In [28]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer

In [29]:
df = pd.read_csv('imdb-movies-dataset.csv')

In [30]:
df['combined_features'] = (
    df['Title'] + ' ' + df['Director'] + ' ' + df['Description'] + ' ' + df['Cast']
)

In [31]:
df['combined_features'] = df['combined_features'].fillna('')


In [33]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, TensorDataset

In [34]:
vectorizer = TfidfVectorizer(max_features=5000)  # Limit to top 5000 features
feature_vectors = vectorizer.fit_transform(df['combined_features'])

In [46]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df['Rating'] = scaler.fit_transform(df[['Rating']])


In [47]:
X_train, X_test, y_train, y_test = train_test_split(
    feature_vectors, df['Rating'], test_size=0.2, random_state=42
)

In [48]:
X_train_tensor = torch.tensor(X_train.toarray(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test.toarray(), dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).view(-1, 1)
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).view(-1, 1)

In [49]:
X_train_tensor = torch.nan_to_num(X_train_tensor, nan=0.0, posinf=1e6, neginf=-1e6)
y_train_tensor = torch.nan_to_num(y_train_tensor, nan=0.0, posinf=1e6, neginf=-1e6)


In [50]:
class MovieRatingPredictor(nn.Module):
    def __init__(self, input_size):
        super(MovieRatingPredictor, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1) 
        )

    def forward(self, x):
        return self.fc(x)

# Initialize the model
input_size = X_train_tensor.shape[1]
model = MovieRatingPredictor(input_size)

In [77]:
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
epochs = 20
for epoch in range(epochs):
    model.train()
    optimizer.zero_grad()
    predictions = model(X_train_tensor)
    loss = criterion(predictions, y_train_tensor)
    loss.backward()
    optimizer.step()
    
    print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")
        

Epoch 1/20, Loss: 0.0112
Epoch 2/20, Loss: 0.0128
Epoch 3/20, Loss: 0.0109
Epoch 4/20, Loss: 0.0116
Epoch 5/20, Loss: 0.0118
Epoch 6/20, Loss: 0.0110
Epoch 7/20, Loss: 0.0105
Epoch 8/20, Loss: 0.0106
Epoch 9/20, Loss: 0.0108
Epoch 10/20, Loss: 0.0106
Epoch 11/20, Loss: 0.0102
Epoch 12/20, Loss: 0.0099
Epoch 13/20, Loss: 0.0100
Epoch 14/20, Loss: 0.0100
Epoch 15/20, Loss: 0.0098
Epoch 16/20, Loss: 0.0096
Epoch 17/20, Loss: 0.0094
Epoch 18/20, Loss: 0.0093
Epoch 19/20, Loss: 0.0093
Epoch 20/20, Loss: 0.0092


In [78]:
X_test_tensor = torch.nan_to_num(X_test_tensor)
y_test_tensor = torch.nan_to_num(y_test_tensor)


model.eval()
with torch.no_grad():
    test_predictions = model(X_test_tensor)
    test_loss = criterion(test_predictions, y_test_tensor)
    print(f"Test Loss: {test_loss.item():.4f}")

Test Loss: 0.0353


In [79]:
myData = [['Iron Man', 'Jon Favreau', 'A billionaire industrialist and genius inventor, Tony Stark (Robert Downey Jr.), is conducting weapons tests overseas, but terrorists kidnap him to force him to build a devastating weapon. Instead, he builds an armored suit and upends his captors. Returning to America, Stark refines the suit and uses it to combat crime and terrorism.', 'Robert Downey Jr, Dante Vargas, Daniel Marin']]
myData_combined = [
    ' '.join(myData[0])  # Combine the features into a single string
]

# Transform the text into feature vectors using the pre-trained vectorizer
myData_transformed = vectorizer.transform(myData_combined)

# Convert to a PyTorch tensor
myData_tensor = torch.tensor(myData_transformed.toarray(), dtype=torch.float32)

# Make predictions
model.eval()
with torch.no_grad():
    predictions = model(myData_tensor)
print(f"Predicted Rating: {predictions.item()*10:.1f}")


Predicted Rating: 6.5


In [80]:
myData = [['Titanic', 'James Cameron', 'A seventeen-year-old aristocrat falls in love with a kind but poor artist aboard the luxurious,', 'Leonardo DiCaprio, Kate Winslet, Billy Zane, Kathy Bates']]
myData_combined = [
    ' '.join(myData[0])  # Combine the features into a single string
]

# Transform the text into feature vectors using the pre-trained vectorizer
myData_transformed = vectorizer.transform(myData_combined)

# Convert to a PyTorch tensor
myData_tensor = torch.tensor(myData_transformed.toarray(), dtype=torch.float32)

# Make predictions
model.eval()
with torch.no_grad():
    predictions = model(myData_tensor)
print(f"Predicted Rating: {predictions.item()*10:.1f}")

Predicted Rating: 6.9


In [82]:
def movie_rater(myData):
    myData_combined = [
        ' '.join(myData[0])  # Combine the features into a single string
    ]

    # Transform the text into feature vectors using the pre-trained vectorizer
    myData_transformed = vectorizer.transform(myData_combined)

    # Convert to a PyTorch tensor
    myData_tensor = torch.tensor(myData_transformed.toarray(), dtype=torch.float32)

    # Make predictions
    model.eval()
    with torch.no_grad():
        predictions = model(myData_tensor)
    return (predictions.item()*10)