In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data/processed_hotel_sales.csv')
df

Unnamed: 0,Date,Hotel,Sales
0,2016-01-01,21,497433
1,2016-01-01,22,507554
2,2016-01-01,23,756729
3,2016-01-01,24,715910
4,2016-01-01,25,1158284
...,...,...,...
424,2018-03-26,27,984828
425,2018-03-26,28,3671528
426,2018-03-26,29,1888454
427,2018-03-26,30,671403


In [13]:
# Group the data by "Hotel"
grouped_df = df.groupby("Hotel")

# Create a dataset for each individual hotel
datasets = {}
for hotel, data in grouped_df:
    datasets[hotel] = data

# Save each dataset to a separate CSV file
for hotel, data in datasets.items():
    data.to_csv(f'data/hotel_{hotel}.csv', index=False)


In [15]:
from statsmodels.tsa.stattools import adfuller

def check_stationarity(timeseries):
    # Perform Dickey-Fuller test:
    dftest = adfuller(timeseries, autolag='AIC')
    pvalue = dftest[1]
    
    # Check the p-value
    if pvalue < 0.05:
        return "Stationnary"
    else:
        return "Non Stationnary"

# Check Stationarity for each hotel
for hotel, data in datasets.items():
    print(f"Hotel: {hotel}")
    print(check_stationarity(data['Sales']))
    print("\n")

Hotel: 21
Non Stationnary


Hotel: 22
Non Stationnary


Hotel: 23
Stationnary


Hotel: 24
Non Stationnary


Hotel: 25
Non Stationnary


Hotel: 26
Non Stationnary


Hotel: 27
Stationnary


Hotel: 28
Stationnary


Hotel: 29
Non Stationnary


Hotel: 30
Stationnary


Hotel: 59
Stationnary




In [16]:
# Make the time series stationary
for hotel, data in datasets.items():
    data['Sales_diff'] = data['Sales'].diff()
    data = data.dropna()  # remove missing values created by differencing
    print(f"Hotel: {hotel}")
    print(check_stationarity(data['Sales_diff']))
    print("\n")

Hotel: 21
Stationnary


Hotel: 22
Stationnary


Hotel: 23
Stationnary


Hotel: 24
Stationnary


Hotel: 25
Stationnary


Hotel: 26
Stationnary


Hotel: 27
Stationnary


Hotel: 28
Stationnary


Hotel: 29
Stationnary


Hotel: 30
Stationnary


Hotel: 59
Stationnary




In [17]:
from fbprophet import Prophet

# Loop over each hotel
for hotel, data in datasets.items():
    # Prepare the data
    data = data.reset_index().rename(columns={'Date': 'ds', 'Sales_diff': 'y'})

    # Create a Prophet instance
    model = Prophet()

    # Fit the model
    model.fit(data)

    # Create a DataFrame for future predictions
    future = model.make_future_dataframe(periods=365)

    # Make predictions
    forecast = model.predict(future)

    # Plot the forecast
    model.plot(forecast)

ModuleNotFoundError: No module named 'fbprophet'