In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

from evaluator import Evaluator

In [None]:
class MLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size), # 6
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size), # 32
            nn.ReLU(),
            nn.Linear(hidden_size, output_size) # 1
        )
    
    def forward(self, x):
        return self.model(x)


In [3]:
data = pd.read_csv('coffee_shop_revenue.csv')
data.head()

Unnamed: 0,Number_of_Customers_Per_Day,Average_Order_Value,Operating_Hours_Per_Day,Number_of_Employees,Marketing_Spend_Per_Day,Location_Foot_Traffic,Daily_Revenue
0,152,6.74,14,4,106.62,97,1547.81
1,485,4.5,12,8,57.83,744,2084.68
2,398,9.09,6,6,91.76,636,3118.39
3,320,8.48,17,4,462.63,770,2912.2
4,156,7.44,17,2,412.52,232,1663.42


In [None]:
# Load the CSV file into a DataFrame
data = pd.read_csv('coffee_shop_revenue.csv')

X = data.drop('Daily_Revenue', axis=1).values.astype('float32')
y = data['Daily_Revenue'].values.astype('float32').reshape(-1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler_X = StandardScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)

scaler_y = StandardScaler()
y_train = scaler_y.fit_transform(y_train)
y_test = scaler_y.transform(y_test)

# Convert numpy arrays to torch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create a TensorDataset and DataLoader for the training data
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=len(train_dataset), shuffle=True)


In [5]:
input_size = X_train.shape[1]  # number of features
hidden_size = 32
output_size = 1  # predicting Daily_Revenue
model = MLP(input_size, hidden_size, output_size)

# Loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
# Train the model
epochs = 1500
model.train()
for epoch in range(epochs):
    running_loss = 0.0
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * inputs.size(0)
    epoch_loss = running_loss / len(train_dataset)
    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

Epoch 1/1500, Loss: 1.1460
Epoch 2/1500, Loss: 1.1441
Epoch 3/1500, Loss: 1.1421
Epoch 4/1500, Loss: 1.1401
Epoch 5/1500, Loss: 1.1381
Epoch 6/1500, Loss: 1.1362
Epoch 7/1500, Loss: 1.1342
Epoch 8/1500, Loss: 1.1322
Epoch 9/1500, Loss: 1.1303
Epoch 10/1500, Loss: 1.1283
Epoch 11/1500, Loss: 1.1264
Epoch 12/1500, Loss: 1.1245
Epoch 13/1500, Loss: 1.1225
Epoch 14/1500, Loss: 1.1206
Epoch 15/1500, Loss: 1.1187
Epoch 16/1500, Loss: 1.1168
Epoch 17/1500, Loss: 1.1149
Epoch 18/1500, Loss: 1.1130
Epoch 19/1500, Loss: 1.1111
Epoch 20/1500, Loss: 1.1092
Epoch 21/1500, Loss: 1.1073
Epoch 22/1500, Loss: 1.1054
Epoch 23/1500, Loss: 1.1035
Epoch 24/1500, Loss: 1.1016
Epoch 25/1500, Loss: 1.0998
Epoch 26/1500, Loss: 1.0979
Epoch 27/1500, Loss: 1.0960
Epoch 28/1500, Loss: 1.0942
Epoch 29/1500, Loss: 1.0923
Epoch 30/1500, Loss: 1.0905
Epoch 31/1500, Loss: 1.0886
Epoch 32/1500, Loss: 1.0868
Epoch 33/1500, Loss: 1.0849
Epoch 34/1500, Loss: 1.0831
Epoch 35/1500, Loss: 1.0813
Epoch 36/1500, Loss: 1.0794
E

In [7]:
model.eval()
with torch.no_grad():
    predictions_scaled = model(X_test_tensor).numpy()

# Inverse transform the scaled predictions and true targets
predictions = scaler_y.inverse_transform(predictions_scaled)
y_test_actual = scaler_y.inverse_transform(y_test)

# Create a DataFrame with true and predicted values
results_df = pd.DataFrame({
    'True_Revenue': y_test_actual.flatten(),
    'Predicted_Revenue': predictions.flatten()
})

print(results_df)

Evaluator.eval_regression(y_pred=results_df["Predicted_Revenue"], y_true=results_df["True_Revenue"], n_features=len(data.columns)-1, regressor_name="MLP", plot=False)

     True_Revenue  Predicted_Revenue
0     3443.689941        3484.574219
1     1021.080017        1265.657593
2     2384.419922        2210.681396
3     2198.350098        1895.490479
4     1988.890015        1822.373535
..            ...                ...
395   1622.310059        2100.376953
396   2347.610107        2476.613281
397    976.489990         873.208252
398   1349.270020        1465.869751
399   2503.250000        2309.937256

[400 rows x 2 columns]
MAE: 170.97571 
 MSE: 43963.33 
 RMSE: 209.67433 
 R2: 0.9529486298561096 
 MAPE: 13.068821
Mean: 170.97571
Variance: 14730.645
Standard Deviation: 121.369865
r2_adjusted: 0.9522302883271953


Unnamed: 0,bin_label,mean,variance,std_dev,max_error,min_error,n_sample
0,"(1000.8160125732422, 0.0]",176.462036,16401.25,128.067368,587.342773,2.783691,83
1,"(2001.6320251464845, 1000.8160125732422]",160.407486,11819.399414,108.717064,478.066895,3.221436,154
2,"(3002.448037719727, 2001.6320251464845]",184.347992,16878.25,129.916321,477.290283,1.96875,104
3,"(4003.264050292969, 3002.448037719727]",155.332184,13147.96582,114.664581,450.22998,5.822266,49


In [None]:
LR = LinearRegression()

# Train the model using the training data
LR.fit(X_train, y_train)

# Make predictions on the test set
y_pred = LR.predict(X_test)

results_df = pd.DataFrame({
    'True_Revenue': y_test.flatten(),
    'Predicted_Revenue': y_pred.flatten()
})

Evaluator.eval_regression(y_pred=results_df["Predicted_Revenue"], y_true=results_df["True_Revenue"], n_features=len(data.columns)-1, regressor_name="Lineal Regression", plot=False)

MAE: 244.20966 
 MSE: 97569.88 
 RMSE: 312.3618 
 R2: 0.8955766558647156 
 MAPE: 19.424444
Mean: 244.20966
Variance: 37931.51
Standard Deviation: 194.76013
r2_adjusted: 0.8939824063359326


Unnamed: 0,bin_label,mean,variance,std_dev,max_error,min_error,n_sample
0,"(1000.8160202026368, 0.0]",320.283875,62736.597656,250.472748,873.593567,1.954346,83
1,"(2001.6320404052735, 1000.8160202026368]",226.004791,25845.792969,160.766266,678.058105,2.22229,154
2,"(3002.4480606079105, 2001.6320404052735]",174.690903,18490.84375,135.981049,661.899414,4.383301,104
3,"(4003.264080810547, 3002.4480606079105]",243.600052,25517.410156,159.741699,676.123291,7.402832,49


In [9]:
#Evaluator.save(name="result")