# Movies (SVR)

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, root_mean_squared_error, mean_squared_error, mean_absolute_percentage_error

import matplotlib.pyplot as plt
import plotly.express as px

import pickle

In [2]:
df = pd.read_parquet("data/movies-clean.parquet")
df.shape

(8168, 23823)

In [3]:
df.head()

Unnamed: 0,RATING,RunTime,TYPE,YEAR_FROM,YEAR_TO,Genre_Action,Genre_Adventure,Genre_Animation,Genre_Biography,Genre_Comedy,...,Actor_Özge Borak,Actor_Özge Özpirinçci,Actor_Özgür Emre Yildirim,Actor_Özgür Ozan,Actor_Özkan Ugur,Actor_Özz Nûjen,Actor_Úrsula Corberó,Actor_Úrsula Pruneda,Actor_Ülkü Duru,Actor_Þorsteinn Bachmann
0,6.1,121.0,Movie,2021,2021,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,5.0,25.0,Series,2021,2021,1,1,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,8.2,44.0,Series,2010,2022,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,9.2,23.0,Series,2013,2013,0,1,1,0,1,...,0,0,0,0,0,0,0,0,0,0
5,7.6,50.0,Series,2020,2020,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


## Encoding Type

In [4]:
df["TYPE"] = np.where(df["TYPE"] == "Movie", 1, 0)
df.dtypes

RATING                      float64
RunTime                     float64
TYPE                          int32
YEAR_FROM                     Int32
YEAR_TO                       Int32
                             ...   
Actor_Özz Nûjen               int64
Actor_Úrsula Corberó          int64
Actor_Úrsula Pruneda          int64
Actor_Ülkü Duru               int64
Actor_Þorsteinn Bachmann      int64
Length: 23823, dtype: object

## Feature / Target Selection

In [5]:
X = df.drop(columns=["RATING"])
y = df["RATING"]

## Standard Scaler

In [6]:
scaler = StandardScaler()
X[X.columns] = scaler.fit_transform(X[X.columns])
X.head()

Unnamed: 0,RunTime,TYPE,YEAR_FROM,YEAR_TO,Genre_Action,Genre_Adventure,Genre_Animation,Genre_Biography,Genre_Comedy,Genre_Crime,...,Actor_Özge Borak,Actor_Özge Özpirinçci,Actor_Özgür Emre Yildirim,Actor_Özgür Ozan,Actor_Özkan Ugur,Actor_Özz Nûjen,Actor_Úrsula Corberó,Actor_Úrsula Pruneda,Actor_Ülkü Duru,Actor_Þorsteinn Bachmann
0,1.216328,0.933637,0.681572,0.625929,1.851891,-0.442409,-0.455794,-0.184185,-0.648667,-0.450298,...,-0.011065,0.0,-0.011065,0.0,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065
1,-0.934956,-1.071081,0.681572,0.625929,1.851891,2.260354,2.193974,-0.184185,-0.648667,-0.450298,...,-0.011065,0.0,-0.011065,0.0,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065
2,-0.509181,-1.071081,-0.772734,0.762476,-0.539989,-0.442409,-0.455794,-0.184185,-0.648667,-0.450298,...,-0.011065,0.0,-0.011065,0.0,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065
3,-0.979774,-1.071081,-0.376105,-0.466446,-0.539989,2.260354,2.193974,-0.184185,1.541623,-0.450298,...,-0.011065,0.0,-0.011065,0.0,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065
5,-0.374726,-1.071081,0.549363,0.489382,1.851891,-0.442409,-0.455794,-0.184185,-0.648667,2.220751,...,-0.011065,0.0,-0.011065,0.0,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065,-0.011065


## Train/Test Split

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
                                    X, y, test_size=0.20, random_state=42)
X_train.shape, X_test.shape

((6534, 23822), (1634, 23822))

## Training

In [8]:
%%time
model = SVR(kernel="rbf")
model.fit(X_train, y_train)

CPU times: total: 36min 26s
Wall time: 9min 27s


## Evaluation

### Test Evaluation

In [None]:
predictions = model.predict(X_test)

mae = mean_absolute_error(y_test, predictions)
mse = mean_squared_error(y_test, predictions)
rmse = root_mean_squared_error(y_test, predictions)
mape = mean_absolute_percentage_error(y_test, predictions)

print(f"""
MAE: {mae:0.5f}
MSE: {mse:0.5f}
RMSE: {rmse:0.5f}
MAPE: {mape:0.5f}
""")

### Train Evaluation

In [None]:
predictions = model.predict(X_train)

mae = mean_absolute_error(y_train, predictions)
mse = mean_squared_error(y_train, predictions)
rmse = root_mean_squared_error(y_train, predictions)
mape = mean_absolute_percentage_error(y_train, predictions)

print(f"""
MAE: {mae:0.5f}
MSE: {mse:0.5f}
RMSE: {rmse:0.5f}
MAPE: {mape:0.5f}
""")