In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import pandas as pd
from statsmodels.tsa.api import VAR

In [3]:
df = pd.read_csv("/content/drive/Shareddrives/Hackathon/data/cleaned_data/merged_final_2.csv")
df.fillna(0, inplace = True)

In [4]:
# First, pivot the dataframe with multiple values for each entity
df= df[['DEB_TIME', 'ENTITY_DESCRIPTION_SHORT','WAIT_TIME_MAX', 'NB_UNITS', 'CAPACITY', 'GUEST_CARRIED']]

# Pivot the table: Each stock/entity as a column, indexed by DEB_TIME
df_pivot = df.pivot_table(index='DEB_TIME',
                                columns='ENTITY_DESCRIPTION_SHORT',
                                values=['WAIT_TIME_MAX', 'NB_UNITS','CAPACITY', 'GUEST_CARRIED'],
                                aggfunc='first')  # You can choose aggregation function

# Flatten the multi-level columns so that it reflects the format 'ENTITY_DESCRIPTION<WAIT_TIME_MAX>' etc.
df_pivot.columns = [f'{col[1]}<{col[0]}>' for col in df_pivot.columns]

# Sort by date to ensure the correct order
df_pivot = df_pivot.sort_index()
df_pivot.reset_index(inplace = True)

df_pivot['DEB_TIME'] = pd.to_datetime(df_pivot['DEB_TIME'])

# Round down to the nearest hour instead of converting to string
df_pivot['DEB_TIME'] = df_pivot['DEB_TIME'].dt.floor('H')

# Select only numeric columns for aggregation
numeric_cols = df_pivot.select_dtypes(include=['number']).columns.tolist()

# Group by DEB_TIME and compute mean for numeric columns
df_pivot = df_pivot.groupby('DEB_TIME')[numeric_cols].mean().reset_index()

# Set DEB_TIME as index (optional, if needed for time series)
df_pivot.set_index('DEB_TIME', inplace=True)

print(df_pivot.head())

                     Bumper Cars<CAPACITY>  Bungee Jump<CAPACITY>  \
DEB_TIME                                                            
2018-06-01 09:00:00                    0.0                 224.95   
2018-06-01 10:00:00                    0.0                 306.75   
2018-06-01 11:00:00                    0.0                 306.75   
2018-06-01 12:00:00                    0.0                 306.75   
2018-06-01 13:00:00                    0.0                 306.75   

                     Circus Train<CAPACITY>  Crazy Dance<CAPACITY>  \
DEB_TIME                                                             
2018-06-01 09:00:00                     0.0              120.83375   
2018-06-01 10:00:00                   350.0              250.00100   
2018-06-01 11:00:00                   350.0              250.00100   
2018-06-01 12:00:00                   350.0              250.00100   
2018-06-01 13:00:00                   350.0              250.00100   

                     Dizz

  df_pivot['DEB_TIME'] = df_pivot['DEB_TIME'].dt.floor('H')


In [None]:
# Extracting time-based features
df_pivot['day_of_week'] = df_pivot.index.dayofweek.astype(float)
df_pivot['month'] = df_pivot.index.month.astype(float)

# Sort & Handle Missing Values
df_pivot = df_pivot.sort_index()
df_pivot.fillna(method='ffill', inplace=True)  # Forward fill
df_pivot.fillna(method='bfill', inplace=True)  # Backward fill
df_pivot.drop(columns=df_pivot.filter(like='Vertical Drop').columns, inplace=True)

# 2️⃣ Train the VAR Model
model = VAR(df_pivot)

# Select Optimal Lag Using AIC
lag_selection = model.select_order(maxlags=60)  # Test up to 30 lags
optimal_lag = lag_selection.aic
print(f"Optimal Lag Order: {optimal_lag}")

  df_pivot.fillna(method='ffill', inplace=True)  # Forward fill
  df_pivot.fillna(method='bfill', inplace=True)  # Backward fill
  self._init_dates(dates, freq)


In [None]:
# Fit the Model
var_results = model.fit(optimal_lag)

# 3️⃣ Forecast Future ⁠ WAIT_TIME_MAX ⁠
lag_input = df_pivot.values[-optimal_lag:]  # Last known values

# Generate timestamps for the next two months (60 days)
forecast_dates = pd.date_range(
    start=df_pivot.index[-1] + pd.Timedelta(hours=1),  # Start from the next hour
    periods=7*14,  # 60 days * 14 time slots per day (9:00 - 22:00)
    freq="H"  # Generate hourly data
)

# Filter only times between 9:00 and 22:00
forecast_dates = forecast_dates[forecast_dates.hour.isin(range(9, 23))]

# Predict for the given number of valid timestamps
var_forecast = var_results.forecast(lag_input, steps=len(forecast_dates))

# Convert Forecast to DataFrame
var_forecast_df = pd.DataFrame(var_forecast, index=forecast_dates, columns=df_pivot.columns)

# Ensure non-negative predictions
var_forecast_df = var_forecast_df.clip(lower=0)

# 4️⃣ Display Results
print(var_forecast_df.head())

In [None]:
#Predict NB_UNITS, ADJUST_CAPACITY...............
#USE PROPHET
var_forecast_df.to_csv("/content/drive/Shareddrives/Hackathon/model/prediction_prashant.csv")

In [None]:
df = pd.read_csv("/content/drive/Shareddrives/Hackathon/data/merged_df.csv")
df.fillna(0, inplace = True)

In [None]:
print(df)