In [67]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [68]:
# Load and preprocess dataset
df = pd.read_csv('traffic.csv', parse_dates=['Date'])
df.sort_values('Date', inplace=True)

print(df)

           Date  Visits
0    01-01-2020    1554
1    01-02-2020    2820
2    01-03-2020    2970
3    01-04-2020    2111
4    01-05-2020    2393
..          ...     ...
227   8/15/2020    2221
228   8/16/2020    2724
229   8/17/2020    3456
230   8/18/2020    3581
231   8/19/2020    2064

[232 rows x 2 columns]


In [69]:
# Normalize Visits
scaler = MinMaxScaler()
df['Visits'] = scaler.fit_transform(df[['Visits']])

print(df)

           Date    Visits
0    01-01-2020  0.000000
1    01-02-2020  0.247992
2    01-03-2020  0.277375
3    01-04-2020  0.109109
4    01-05-2020  0.164349
..          ...       ...
227   8/15/2020  0.130656
228   8/16/2020  0.229187
229   8/17/2020  0.372576
230   8/18/2020  0.397062
231   8/19/2020  0.099902

[232 rows x 2 columns]


In [70]:
# Create lag features
def create_lag_features(data, lag=7):
    for i in range(1, lag + 1):
        data[f'lag_{i}'] = data['Visits'].shift(i)
    return data

lag_count = 7
df = create_lag_features(df, lag_count)
df.dropna(inplace=True)

print(df)

           Date    Visits     lag_1     lag_2     lag_3     lag_4     lag_5  \
7    01-08-2020  0.419980  0.432125  0.421156  0.164349  0.109109  0.277375   
8    01-09-2020  0.526738  0.419980  0.432125  0.421156  0.164349  0.109109   
9    01-10-2020  0.317924  0.526738  0.419980  0.432125  0.421156  0.164349   
10   01-11-2020  0.158080  0.317924  0.526738  0.419980  0.432125  0.421156   
11   01-12-2020  0.236631  0.158080  0.317924  0.526738  0.419980  0.432125   
..          ...       ...       ...       ...       ...       ...       ...   
227   8/15/2020  0.130656  0.277571  0.404897  0.284231  0.361019  0.460137   
228   8/16/2020  0.229187  0.130656  0.277571  0.404897  0.284231  0.361019   
229   8/17/2020  0.372576  0.229187  0.130656  0.277571  0.404897  0.284231   
230   8/18/2020  0.397062  0.372576  0.229187  0.130656  0.277571  0.404897   
231   8/19/2020  0.099902  0.397062  0.372576  0.229187  0.130656  0.277571   

        lag_6     lag_7  
7    0.247992  0.000000  

In [71]:
# Define inputs and outputs
X = df.drop(columns=['Date', 'Visits'])
y = df['Visits']

# Define splits
splits = {'70/30': 0.7, '80/20': 0.8, '90/10': 0.9}
results = {}

print(X,y)

        lag_1     lag_2     lag_3     lag_4     lag_5     lag_6     lag_7
7    0.432125  0.421156  0.164349  0.109109  0.277375  0.247992  0.000000
8    0.419980  0.432125  0.421156  0.164349  0.109109  0.277375  0.247992
9    0.526738  0.419980  0.432125  0.421156  0.164349  0.109109  0.277375
10   0.317924  0.526738  0.419980  0.432125  0.421156  0.164349  0.109109
11   0.158080  0.317924  0.526738  0.419980  0.432125  0.421156  0.164349
..        ...       ...       ...       ...       ...       ...       ...
227  0.277571  0.404897  0.284231  0.361019  0.460137  0.438981  0.479334
228  0.130656  0.277571  0.404897  0.284231  0.361019  0.460137  0.438981
229  0.229187  0.130656  0.277571  0.404897  0.284231  0.361019  0.460137
230  0.372576  0.229187  0.130656  0.277571  0.404897  0.284231  0.361019
231  0.397062  0.372576  0.229187  0.130656  0.277571  0.404897  0.284231

[225 rows x 7 columns] 7      0.419980
8      0.526738
9      0.317924
10     0.158080
11     0.236631
        

In [72]:
# Loop through splits
for split_name, train_ratio in splits.items():
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_ratio, shuffle=False)

    # Define neural network
    model = Sequential([
        Input(shape=(X_train.shape[1],)),  # input_shape - number of features
        Dense(64, activation='relu'), 
        Dense(32, activation='relu'),
        Dense(1)  # Predict the target
    ])
    
    model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
    
    # Train the model
    model.fit(X_train, y_train, epochs=50, batch_size=16, verbose=0)
    
    # Predictions
    y_pred = model.predict(X_test)

    y_pred_denormalized = scaler.inverse_transform(y_pred.reshape(-1, 1)).flatten()
    y_test_denormalized = scaler.inverse_transform(y_test.values.reshape(-1, 1)).flatten()


    y3 = pd.DataFrame(y_pred_denormalized)
    y3.to_csv('y_pred_denormalized.csv', index=False)
    
    y4 = pd.DataFrame(y_test_denormalized)
    y4.to_csv('y_test_denormalized.csv', index=False)
    
    # Calculate metrics
    mae = mean_absolute_error(y_test_denormalized, y_pred_denormalized)
    rmse = np.sqrt(mean_squared_error(y_test_denormalized, y_pred_denormalized))
    mape = np.mean(np.abs((y_test_denormalized - y_pred_denormalized.flatten()) / y_test_denormalized)) * 100
    
    results[split_name] = {'MAE': mae, 'RMSE': rmse, 'MAPE': mape}

[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step 
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step


In [73]:
# Display results
for split, metrics in results.items():
    print(f"{split}:")
    for metric, value in metrics.items():
        print(f"  {metric}: {value:.4f}")

70/30:
  MAE: 397.2704
  RMSE: 519.4570
  MAPE: 11.3107
80/20:
  MAE: 480.7700
  RMSE: 634.7644
  MAPE: 15.7250
90/10:
  MAE: 441.7253
  RMSE: 577.0600
  MAPE: 15.5746
