In [18]:
import pandas as pd
import numpy as np
from statsmodels.tsa.api import ExponentialSmoothing
from plotly import graph_objects as go

#Let's get the training and test datasets using the urls
def load_data(url_train, url_test):
    """Loads and preprocesses data."""
    train_data = pd.read_csv(url_train, parse_dates=['Timestamp'], index_col='Timestamp')
    test_data = pd.read_csv(url_test, parse_dates=['Timestamp'], index_col='Timestamp')

    #If there is missing trip data, remove it
    train_data.dropna(subset=['trips'], inplace=True)

    # Now, let's break the dates into days, hours, and months, since the model uses those
    train_data['hour'] = train_data.index.hour
    train_data['day'] = train_data.index.day
    train_data['month'] = train_data.index.month

    test_data['hour'] = test_data.index.hour
    test_data['day'] = test_data.index.day
    test_data['month'] = test_data.index.month

    return train_data, test_data

#Now let's actually build a model using the exponential smoothing function. We pass the database and parameters to it.
def create_model(train_data, **kwargs):
    """Creates and fits the Exponential Smoothing model."""
    try:
        model = ExponentialSmoothing(
            train_data['trips'],
            seasonal_periods=24,  # Assuming daily seasonality
            trend='add',
            seasonal='add'# Additive seasonality (can be 'add', 'mul', or None)
        )
        modelFit = model.fit(**kwargs) # Pass any smoothing_level parameters to the fit function

        return model, modelFit

    except Exception as e:
        print(f"Something happened! {e}")
        return None, None

#Now let's use the model to make predictions with the parameters given
def make_predictions(modelFit, test_data):
    """Generates predictions on the test data."""
    # Create forecast

    if modelFit is None:
        print("Uh Oh, we cannot make predictions because model fitting is not complete.")
        return None

    predictions = modelFit.forecast(steps=len(test_data))
    return predictions

#Once we have our predictions, let's plot them so that we can see what is being generated.
def visualize_predictions(test_data, predictions):
    """Visualizes predictions vs. actual values."""
    if predictions is None:
        print("There are no predictions to show. Please check code.")
        return

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=test_data.index, y=test_data['trips'], mode='lines', name='Real Life Values'))
    fig.add_trace(go.Scatter(x=test_data.index, y=predictions, mode='lines', name='Model Output'))
    fig.update_layout(title='Taxi Trip Prediction')
    fig.show()


if __name__ == '__main__':
    # Where can we find our data?
    url_train = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_train.csv"
    url_test = "https://github.com/dustywhite7/econ8310-assignment1/raw/main/assignment_data_test.csv"

    # Let's use the load_data function from earlier!
    train_data, test_data = load_data(url_train, url_test)

    # Create and train our model! The most important part of our assignment.
    model, modelFit = create_model(train_data, smoothing_level=0.2, optimized=False, smoothing_trend=0.5, smoothing_seasonal=0.5) #You can adjust smoothing level parameters

    # Now that we have our model, let's use it to predict taxi trips
    pred = make_predictions(modelFit, test_data)

    if pred is not None:
        # Set our results from model in the dataframe
        test_data['trips'] = pred

        #Let's print our beautiful model output to get a good grade!
        visualize_predictions(test_data, pred)

        #Print dataframe with results in case it needs to be assessed
        print(test_data)
    else:
        print("Uh Oh, prediction failed. Something must be wrong with the model.")


No frequency information was provided, so inferred frequency h will be used.



                     year  month  day  hour         trips
Timestamp                                                
2019-01-01 00:00:00  2019      1    1     0 -4.682392e+44
2019-01-01 01:00:00  2019      1    1     1 -5.218196e+44
2019-01-01 02:00:00  2019      1    1     2 -5.701575e+44
2019-01-01 03:00:00  2019      1    1     3 -6.033089e+44
2019-01-01 04:00:00  2019      1    1     4 -6.120671e+44
...                   ...    ...  ...   ...           ...
2019-01-31 19:00:00  2019      1   31    19  7.966261e+46
2019-01-31 20:00:00  2019      1   31    20  7.975203e+46
2019-01-31 21:00:00  2019      1   31    21  7.979228e+46
2019-01-31 22:00:00  2019      1   31    22  7.978651e+46
2019-01-31 23:00:00  2019      1   31    23  7.947866e+46

[744 rows x 5 columns]
