# Budget prediction

In [1]:
import torch
import torch.nn as nn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
# Read the data from the CSV file
data = pd.read_csv('trip.csv')

In [3]:
# Convert dates to pandas datetime format
data['StartDate'] = pd.to_datetime(data['StartDate'])
data['EndDate'] = pd.to_datetime(data['EndDate'])

In [4]:
# Convert dates to numerical values (days since the earliest date)
earliest_date = min(data['StartDate'])
data['StartDate'] = (data['StartDate'] - earliest_date).dt.days
data['EndDate'] = (data['EndDate'] - earliest_date).dt.days

In [5]:
# Normalize the budget column using StandardScaler
scaler = StandardScaler()
data['Budget'] = scaler.fit_transform(data['Budget'].values.reshape(-1, 1))

In [6]:
# Convert the data to PyTorch tensors
X = torch.tensor(data[['StartDate', 'EndDate']].values, dtype=torch.float32)
y = torch.tensor(data['Budget'].values, dtype=torch.float32).view(-1, 1)

In [9]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
X_train

tensor([[8892., 8914.],
        [ 870.,  892.],
        [8784., 8794.],
        ...,
        [4568., 4581.],
        [4368., 4378.],
        [7334., 7353.]])

In [11]:
y_train

tensor([[ 1.4001],
        [-0.0661],
        [ 1.2668],
        ...,
        [-0.1994],
        [ 1.0003],
        [-0.5993]])

In [12]:
# Define the deep learning model
class BudgetPredictor(nn.Module):
    def __init__(self):
        super(BudgetPredictor, self).__init__()
        self.fc1 = nn.Linear(2, 16)
        self.fc2 = nn.Linear(16, 8)
        self.fc3 = nn.Linear(8, 1)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [13]:
# Initialize the model
model = BudgetPredictor()


In [14]:
# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [19]:
# Training loop
num_epochs = 4000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/4000], Loss: 1.0064
Epoch [200/4000], Loss: 1.0694
Epoch [300/4000], Loss: 1.0077
Epoch [400/4000], Loss: 1.0943
Epoch [500/4000], Loss: 1.0408
Epoch [600/4000], Loss: 1.0142
Epoch [700/4000], Loss: 1.0292
Epoch [800/4000], Loss: 1.0038
Epoch [900/4000], Loss: 1.1597
Epoch [1000/4000], Loss: 1.4577
Epoch [1100/4000], Loss: 1.2070
Epoch [1200/4000], Loss: 1.4372
Epoch [1300/4000], Loss: 1.4617
Epoch [1400/4000], Loss: 1.0147
Epoch [1500/4000], Loss: 1.0038
Epoch [1600/4000], Loss: 1.1002
Epoch [1700/4000], Loss: 1.0164
Epoch [1800/4000], Loss: 1.0088
Epoch [1900/4000], Loss: 1.0053
Epoch [2000/4000], Loss: 1.0865
Epoch [2100/4000], Loss: 1.0129
Epoch [2200/4000], Loss: 1.0081
Epoch [2300/4000], Loss: 2.8495
Epoch [2400/4000], Loss: 1.0140
Epoch [2500/4000], Loss: 1.0092
Epoch [2600/4000], Loss: 3.6651
Epoch [2700/4000], Loss: 1.0152
Epoch [2800/4000], Loss: 1.0103
Epoch [2900/4000], Loss: 17.8283
Epoch [3000/4000], Loss: 1.0159
Epoch [3100/4000], Loss: 1.0107
Epoch [3200/4000

In [27]:
# Testing
print(X_train)

model.eval()
with torch.no_grad():
    predicted_train = model(X_train)
    predicted_test = model(X_test)
    
predicted_train

tensor([[8892., 8914.],
        [ 870.,  892.],
        [8784., 8794.],
        ...,
        [4568., 4581.],
        [4368., 4378.],
        [7334., 7353.]])


tensor([[0.2213],
        [0.1553],
        [0.0442],
        ...,
        [0.0537],
        [0.0080],
        [0.1643]])

In [21]:
# Convert predictions back to original scale
predicted_train = scaler.inverse_transform(predicted_train.numpy())
predicted_test = scaler.inverse_transform(predicted_test.numpy())

In [23]:
# Print sample predictions
print('Sample predictions:')
for i in range(10):
    print(f'Actual: {scaler.inverse_transform(y_test.numpy())[i][0]:.2f}, Predicted: {predicted_test[i][0]:.2f}')

Sample predictions:
Actual: 9000.00, Predicted: 17772.70
Actual: 18000.00, Predicted: 16796.13
Actual: 25000.00, Predicted: 17769.15
Actual: 24000.00, Predicted: 19552.45
Actual: 8000.00, Predicted: 17552.46
Actual: 10000.00, Predicted: 19265.46
Actual: 6000.00, Predicted: 16957.08
Actual: 11000.00, Predicted: 16947.92
Actual: 18000.00, Predicted: 19755.30
Actual: 29000.00, Predicted: 17705.86


In [24]:
# Save the trained model
torch.save(model.state_dict(), 'budget_predictor_model.pth')

In [35]:
scaler_mean = scaler.mean_
scaler_std = scaler.scale_

In [36]:
import pickle

# Save the scaling parameters
with open('scaler_params.pkl', 'wb') as f:
    pickle.dump((scaler_mean, scaler_std), f)
