In [162]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.metrics import mean_absolute_error
import torch

df = pd.read_csv("/Users/chandrus/development/imdb_top_1000.csv")
print(df.head())  # check if the data loaded properly


                                         Poster_Link  \
0  https://m.media-amazon.com/images/M/MV5BMDFkYT...   
1  https://m.media-amazon.com/images/M/MV5BM2MyNj...   
2  https://m.media-amazon.com/images/M/MV5BMTMxNT...   
3  https://m.media-amazon.com/images/M/MV5BMWMwMG...   
4  https://m.media-amazon.com/images/M/MV5BMWU4N2...   

               Series_Title Released_Year Certificate  Runtime  \
0  The Shawshank Redemption          1994           A  142 min   
1             The Godfather          1972           A  175 min   
2           The Dark Knight          2008          UA  152 min   
3    The Godfather: Part II          1974           A  202 min   
4              12 Angry Men          1957           U   96 min   

                  Genre  IMDB_Rating  \
0                 Drama          9.3   
1          Crime, Drama          9.2   
2  Action, Crime, Drama          9.0   
3          Crime, Drama          9.0   
4          Crime, Drama          9.0   

                         

In [163]:
# Select relevant columns for training
features = ["Runtime", "Meta_score", "No_of_Votes"]  # Adjust as needed
target = "IMDB_Rating"

In [164]:
# Drop rows with missing values in selected features
df = df.dropna(subset=features + [target])


In [165]:
# Convert categorical values if needed (e.g., encoding genres)
df["Runtime"] = df["Runtime"].str.replace(" min", "").astype(float)

In [166]:
# Prepare data
X = df[features]
y = df[target]

In [167]:
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [168]:
# Scale numerical features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [169]:
# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1)
y_test = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)


In [170]:
# Initialize and train TabNet model
model = TabNetRegressor()
model.fit(
    X_train.numpy(), y_train.numpy(),
    eval_set=[(X_test.numpy(), y_test.numpy())],
    max_epochs=50,
    patience=5,
    batch_size=128,
)



epoch 0  | loss: 57.29309| val_0_mse: 44.62435|  0:00:00s
epoch 1  | loss: 33.95709| val_0_mse: 26.79542|  0:00:00s
epoch 2  | loss: 18.30399| val_0_mse: 16.16301|  0:00:00s
epoch 3  | loss: 6.35244 | val_0_mse: 10.09499|  0:00:00s
epoch 4  | loss: 1.40312 | val_0_mse: 11.42473|  0:00:00s
epoch 5  | loss: 1.51623 | val_0_mse: 6.05874 |  0:00:00s
epoch 6  | loss: 0.50561 | val_0_mse: 4.33107 |  0:00:00s
epoch 7  | loss: 0.43337 | val_0_mse: 3.16185 |  0:00:00s
epoch 8  | loss: 0.32142 | val_0_mse: 2.21593 |  0:00:00s
epoch 9  | loss: 0.19215 | val_0_mse: 2.34194 |  0:00:00s
epoch 10 | loss: 0.14244 | val_0_mse: 2.1272  |  0:00:00s
epoch 11 | loss: 0.13814 | val_0_mse: 2.17937 |  0:00:00s
epoch 12 | loss: 0.14797 | val_0_mse: 2.08265 |  0:00:00s
epoch 13 | loss: 0.12244 | val_0_mse: 1.47505 |  0:00:00s
epoch 14 | loss: 0.10968 | val_0_mse: 1.6403  |  0:00:00s
epoch 15 | loss: 0.11336 | val_0_mse: 1.36553 |  0:00:00s
epoch 16 | loss: 0.09484 | val_0_mse: 1.25484 |  0:00:00s
epoch 17 | los



In [171]:
from sklearn.preprocessing import StandardScaler
import joblib

# Train the scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)

# Save the fitted scaler
joblib.dump(scaler, "scaler.pkl")
print("Scaler saved successfully!")


Scaler saved successfully!


In [172]:
# Predictions
y_pred = model.predict(X_test.numpy()).flatten()


In [173]:
# Evaluate performance
mae = mean_absolute_error(y_test.numpy(), y_pred)
print(f"Mean Absolute Error: {mae:.3f}")


Mean Absolute Error: 0.356


In [174]:
# Save model
model.save_model("tabnet_imdb")

Successfully saved model at tabnet_imdb.zip


'tabnet_imdb.zip'

In [175]:
from pytorch_tabnet.tab_model import TabNetRegressor
import torch
import pickle

# Assuming you already trained the model
model = TabNetRegressor()

# Load the trained model from its saved state (if it exists)
model.load_model("tabnet_imdb.zip")

# Save the trained model to a .pkl file
with open("tabnet_model.pkl", "wb") as f:
    pickle.dump(model, f)

print("✅ Model saved successfully as 'tabnet_model.pkl'")


✅ Model saved successfully as 'tabnet_model.pkl'




In [176]:
# In your notebook:
# Prepare data
X = df[features]  # ["Runtime", "Meta_score", "No_of_Votes"]
y = df[target]

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train and save scaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Save scaler BEFORE converting to tensor
joblib.dump(scaler, "scaler.pkl")

# Convert to tensor after scaling
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)