# MOVIE BOX OFFICE REVENUE PREDICTION

## Bawandeep Singh Saimbi



In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score



In [5]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

y = train_data['revenue'].values
train_data = train_data.drop(['revenue'], axis=1)
combined_data = pd.concat([train_data, test_data], ignore_index=True)
numeric_features = combined_data.select_dtypes(include=[np.number])
numeric_features.fillna(numeric_features.mean(), inplace=True)
scaler = StandardScaler()
numeric_features = scaler.fit_transform(numeric_features)
X = torch.Tensor(numeric_features)
X_train, X_val, y_train, y_val = train_test_split(X[:len(y)], y, test_size=0.2, random_state=42)

In [6]:
class FNN(nn.Module):
    def __init__(self, input_size):
        super(FNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.fc3(x)
        return x

input_size = X_train.shape[1]
model = FNN(input_size)

criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
num_epochs = 100
batch_size = 64

train_dataset = TensorDataset(X_train, torch.Tensor(y_train))
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

for epoch in range(num_epochs):
    model.train()
    for inputs, targets in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, targets.view(-1, 1))
        loss.backward()
        optimizer.step()
    
    # Validation
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, torch.Tensor(y_val).view(-1, 1))
    
    print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item()}, Validation Loss: {val_loss.item()}')



Epoch [1/100], Loss: 4.322501042844467e+16, Validation Loss: 2.186726549435187e+16
Epoch [2/100], Loss: 7772679624982528.0, Validation Loss: 2.186725905190093e+16
Epoch [3/100], Loss: 1.849829529485312e+16, Validation Loss: 2.186724401951539e+16
Epoch [4/100], Loss: 3.517725532802253e+16, Validation Loss: 2.186721395474432e+16
Epoch [5/100], Loss: 1.2451876842766336e+16, Validation Loss: 2.186716026765312e+16
Epoch [6/100], Loss: 1.6711376298835968e+16, Validation Loss: 2.1867067925856256e+16
Epoch [7/100], Loss: 4.118973280105267e+16, Validation Loss: 2.186692619193549e+16
Epoch [8/100], Loss: 1.4270991987376128e+16, Validation Loss: 2.1866726475956224e+16
Epoch [9/100], Loss: 1.33265050566656e+16, Validation Loss: 2.186645374553293e+16
Epoch [10/100], Loss: 1.2761219714777088e+16, Validation Loss: 2.1866092968280064e+16
Epoch [11/100], Loss: 1.997389358641971e+16, Validation Loss: 2.1865663471550464e+16
Epoch [12/100], Loss: 3547915180048384.0, Validation Loss: 2.186512015818752e+16


In [7]:
val_predictions = val_outputs.numpy().flatten()
mse = mean_squared_error(y_val, val_predictions)
r2 = r2_score(y_val, val_predictions)

print(f'Validation MSE: {mse}')
print(f'Validation R-squared (R2): {r2}')


Validation MSE: 2.1010213814055344e+16
Validation R-squared (R2): -0.2491286499449541
