# Movie Recommendator System

## Data Preprocessing

In [12]:
import os
import pandas as pd
import numpy as np
from sklearn.pipeline import make_pipeline, make_union, Pipeline
from sklearn.compose import make_column_transformer, make_column_selector, ColumnTransformer
from sklearn.preprocessing import StandardScaler, FunctionTransformer, OneHotEncoder

In [2]:
base_directory = os.getcwd().strip('/notebooks')
movies_df = pd.read_csv(os.path.join('/',base_directory,'clean','clean_df.csv'))


In [3]:
movies_df.dtypes

tconst             object
titleType          object
primaryTitle       object
isAdult             int64
startYear           int64
runtimeMinutes      int64
genres             object
averageRating     float64
numVotes          float64
Director           object
dtype: object

In [4]:
movies_df.head()

Unnamed: 0,tconst,titleType,primaryTitle,isAdult,startYear,runtimeMinutes,genres,averageRating,numVotes,Director
0,tt0013274,movie,Istoriya grazhdanskoy voyny,0,2021,94,Documentary,6.8,63.0,
1,tt0015724,movie,Dama de noche,0,1993,102,Drama,6.1,28.0,Eva López Sánchez
2,tt0028248,movie,Shipmates o' Mine,0,2022,87,Musical,4.2,42.0,Oswald Mitchell
3,tt0035423,movie,Kate & Leopold,0,2001,118,Comedy,6.4,88385.0,James Mangold
4,tt0036606,movie,"Another Time, Another Place",0,1983,118,Drama,6.4,350.0,Michael Radford


### Categorical Pipeline

In [13]:
# categorical encoding
#cat_transformer = make_pipeline(genre_splitter,OneHotEncoder(handle_unknown='ignore'), StandardScaler(with_mean=False))
cat_transformer = Pipeline([
    ('Encoder',OneHotEncoder(handle_unknown='ignore', sparse=False)),
    ('Scaler', StandardScaler())
    ])
cat_transformer

### Numerical Pipeline

In [15]:
num_transformer = Pipeline([('Scaler',StandardScaler())])
num_transformer

### Final Pipeline

In [16]:
num_columns = ['isAdult', 'startYear', 'runtimeMinutes', 'averageRating', 'numVotes']
cat_columns = ['genres']

In [21]:
preprocessor = ColumnTransformer([
    ('cat_transformer', cat_transformer, cat_columns),
    ('num_transformer',num_transformer, num_columns)],
    remainder='passthrough'
)
preprocessor

In [22]:
movies_df_transformed = preprocessor.fit_transform(movies_df)
movies_df_transformed = pd.DataFrame(movies_df_transformed, columns=preprocessor.get_feature_names_out())
movies_df_transformed.head()

Unnamed: 0,cat_transformer__genres_Action,cat_transformer__genres_Adult,cat_transformer__genres_Adventure,cat_transformer__genres_Animation,cat_transformer__genres_Biography,cat_transformer__genres_Comedy,cat_transformer__genres_Crime,cat_transformer__genres_Documentary,cat_transformer__genres_Drama,cat_transformer__genres_Family,...,cat_transformer__genres_Western,num_transformer__isAdult,num_transformer__startYear,num_transformer__runtimeMinutes,num_transformer__averageRating,num_transformer__numVotes,remainder__tconst,remainder__titleType,remainder__primaryTitle,remainder__Director
0,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,1.742084,-0.584714,-0.084477,...,-0.034338,-0.140078,1.070565,0.021476,0.551474,-0.135717,tt0013274,movie,Istoriya grazhdanskoy voyny,
1,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,1.710239,-0.084477,...,-0.034338,-0.140078,-0.941441,0.078123,-0.051514,-0.136711,tt0015724,movie,Dama de noche,Eva López Sánchez
2,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,1.142422,-0.028089,-1.688196,-0.136313,tt0028248,movie,Shipmates o' Mine,Oswald Mitchell
3,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,2.21695,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-0.366582,0.191416,0.206909,2.374289,tt0035423,movie,Kate & Leopold,James Mangold
4,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,1.710239,-0.084477,...,-0.034338,-0.140078,-1.660015,0.191416,0.206909,-0.12756,tt0036606,movie,"Another Time, Another Place",Michael Radford


In [23]:
movies_df_transformed.rename(columns={
    'remainder__tconst': 'id',
    'remainder__titleType': 'Type',
    'remainder__primaryTitle':'Title',
    'remainder__Director':'Director'
    }, inplace=True)

## Searching Methods

Practicing searching Methods for finding movies (applying it later to the UI)

In [27]:
filter_test = 'Batma'
mask = movies_df_transformed['Title'].str.contains(filter_test)
movies_df_transformed[mask]

Unnamed: 0,cat_transformer__genres_Action,cat_transformer__genres_Adult,cat_transformer__genres_Adventure,cat_transformer__genres_Animation,cat_transformer__genres_Biography,cat_transformer__genres_Comedy,cat_transformer__genres_Crime,cat_transformer__genres_Documentary,cat_transformer__genres_Drama,cat_transformer__genres_Family,...,cat_transformer__genres_Western,num_transformer__isAdult,num_transformer__startYear,num_transformer__runtimeMinutes,num_transformer__averageRating,num_transformer__numVotes,id,Type,Title,Director
18396,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-1.228871,0.248062,1.154462,11.217111,tt0096895,movie,Batman,Tim Burton
22117,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-1.013299,0.248062,0.809897,9.060209,tt0103776,movie,Batman Returns,Tim Burton
23473,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-0.941441,-0.105978,1.412885,1.447409,tt0106364,movie,Batman: Mask of the Phantasm,
26429,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-0.797727,0.212658,-0.654502,7.371393,tt0112462,movie,Batman Forever,Joel Schumacher
29293,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-0.654012,0.240981,-2.032761,7.411521,tt0118688,movie,Batman & Robin,Joel Schumacher
30608,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-1.085156,0.269305,-0.912926,-0.132761,tt0121067,movie,Alyas Batman en Robin,Tony Y. Reyes
70938,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-0.079153,0.347193,1.75745,44.056511,tt0372784,movie,Batman Begins,Christopher Nolan
98259,3.155279,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,0.92685,-0.134302,-0.051514,-0.097948,tt10327712,movie,Lego DC Batman: Family Matters,Matt Peters
120277,-0.316929,-0.137674,-0.181492,9.342744,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,0.998707,-0.212191,-0.031385,-0.074092,tt12410244,movie,Superfan Dogs: Batman and Superman Comics,
123543,-0.316929,-0.137674,-0.181492,-0.107035,5.073098,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,0.998707,-0.049332,0.637615,-0.136257,tt12736322,movie,Batman and Me,Michael Wayne


## PCA

In [24]:
movies_df_transformed.head()

Unnamed: 0,cat_transformer__genres_Action,cat_transformer__genres_Adult,cat_transformer__genres_Adventure,cat_transformer__genres_Animation,cat_transformer__genres_Biography,cat_transformer__genres_Comedy,cat_transformer__genres_Crime,cat_transformer__genres_Documentary,cat_transformer__genres_Drama,cat_transformer__genres_Family,...,cat_transformer__genres_Western,num_transformer__isAdult,num_transformer__startYear,num_transformer__runtimeMinutes,num_transformer__averageRating,num_transformer__numVotes,id,Type,Title,Director
0,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,1.742084,-0.584714,-0.084477,...,-0.034338,-0.140078,1.070565,0.021476,0.551474,-0.135717,tt0013274,movie,Istoriya grazhdanskoy voyny,
1,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,1.710239,-0.084477,...,-0.034338,-0.140078,-0.941441,0.078123,-0.051514,-0.136711,tt0015724,movie,Dama de noche,Eva López Sánchez
2,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,1.142422,-0.028089,-1.688196,-0.136313,tt0028248,movie,Shipmates o' Mine,Oswald Mitchell
3,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,2.21695,-0.190734,-0.574025,-0.584714,-0.084477,...,-0.034338,-0.140078,-0.366582,0.191416,0.206909,2.374289,tt0035423,movie,Kate & Leopold,James Mangold
4,-0.316929,-0.137674,-0.181492,-0.107035,-0.197118,-0.45107,-0.190734,-0.574025,1.710239,-0.084477,...,-0.034338,-0.140078,-1.660015,0.191416,0.206909,-0.12756,tt0036606,movie,"Another Time, Another Place",Michael Radford


## K Means