# MOVIE BOX OFFICE REVENUE PREDICTION

## Bawandeep Singh Saimbi


In [14]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score




In [15]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')



In [16]:
y = train_data['revenue'].values
train_data = train_data.drop(['revenue'], axis=1)


In [17]:
combined_data = pd.concat([train_data, test_data], ignore_index=True)

numeric_features = combined_data.select_dtypes(include=[np.number])
numeric_features.fillna(numeric_features.mean(), inplace=True)
scaler = StandardScaler()
numeric_features = scaler.fit_transform(numeric_features)



In [18]:
X = torch.Tensor(numeric_features)
X_train, X_val, y_train, y_val = train_test_split(X[:len(y)], y, test_size=0.2, random_state=42)



In [19]:
class GRUModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, 1)

    def forward(self, x):
        out, _ = self.gru(x.unsqueeze(1))  
        out = self.fc(out[:, -1, :]) 
        return out



In [20]:
input_size = X_train.shape[1]
hidden_size = 128
num_layers = 2

gru_model = GRUModel(input_size, hidden_size, num_layers)

criterion = nn.MSELoss()
optimizer_gru = optim.Adam(gru_model.parameters(), lr=0.001)

num_epochs = 100
batch_size = 64

train_dataset = TensorDataset(X_train, torch.Tensor(y_train))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    gru_model.train()
    for inputs, targets in train_loader:
        optimizer_gru.zero_grad()
        outputs = gru_model(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        loss.backward()
        optimizer_gru.step()

    gru_model.eval()
    with torch.no_grad():
        val_outputs = gru_model(X_val)
        val_loss = criterion(val_outputs, torch.Tensor(y_val).view(-1, 1))

    print(f'GRU Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}, Validation Loss: {val_loss.item()}')





GRU Epoch [1/100], Loss: 7886220306677760.0, Validation Loss: 2.186726549435187e+16
GRU Epoch [2/100], Loss: 2.143102566609715e+16, Validation Loss: 2.1867263346868224e+16
GRU Epoch [3/100], Loss: 1.5370255580790784e+16, Validation Loss: 2.1867263346868224e+16
GRU Epoch [4/100], Loss: 7393231612411904.0, Validation Loss: 2.1867263346868224e+16
GRU Epoch [5/100], Loss: 7775573896069120.0, Validation Loss: 2.186725905190093e+16
GRU Epoch [6/100], Loss: 9823153920409600.0, Validation Loss: 2.186725905190093e+16
GRU Epoch [7/100], Loss: 3434005668036608.0, Validation Loss: 2.186725905190093e+16
GRU Epoch [8/100], Loss: 8968814595145728.0, Validation Loss: 2.186725690441728e+16
GRU Epoch [9/100], Loss: 1.900677647302656e+16, Validation Loss: 2.186725690441728e+16
GRU Epoch [10/100], Loss: 2.374541603818701e+16, Validation Loss: 2.186725690441728e+16
GRU Epoch [11/100], Loss: 3.0544922645889024e+16, Validation Loss: 2.186725690441728e+16
GRU Epoch [12/100], Loss: 1.990407889302323e+16, Valid

GRU Epoch [95/100], Loss: 8210783469043712.0, Validation Loss: 2.1867198922358784e+16
GRU Epoch [96/100], Loss: 6591310215512064.0, Validation Loss: 2.1867198922358784e+16
GRU Epoch [97/100], Loss: 2.541507168960512e+16, Validation Loss: 2.186719247990784e+16
GRU Epoch [98/100], Loss: 1.593444677976064e+16, Validation Loss: 2.186719247990784e+16
GRU Epoch [99/100], Loss: 1.3837188479320064e+16, Validation Loss: 2.186719247990784e+16
GRU Epoch [100/100], Loss: 1.903751770144768e+16, Validation Loss: 2.186719247990784e+16


In [21]:
with torch.no_grad():
    val_outputs_gru = gru_model(X_val)

val_predictions_gru = val_outputs_gru.numpy().flatten()

mse_gru = mean_squared_error(y_val, val_predictions_gru)
r2_gru = r2_score(y_val, val_predictions_gru)

print('Validation Metrics for GRU:')
print(f'MSE: {mse_gru}')
print(f'R-squared (R2): {r2_gru}')

Validation Metrics for GRU:
MSE: 2.1867195180431204e+16
R-squared (R2): -0.30007910607467414
