# Movie Recommendation

In [92]:
from sklearn.manifold import TSNE
import pandas as pd
import numpy as np

In [93]:
movies = pd.read_csv('Data/norm_sample_movies.csv')
title = pd.read_csv('Data/title.csv')


In [94]:
movies_features = movies.drop(columns=['primaryTitle', 'tconst'])

## t-SNE

In [95]:
# movies_np = movies.to_numpy(dtype='uint8')
movies_np = np.array(movies_features)


In [96]:
tsne_data = TSNE(n_components=1, learning_rate='auto', init='random').fit_transform(movies_np)


In [97]:
movies['tsne_score'] = tsne_data

In [98]:
movies.sort_values(by=['tsne_score'], inplace=True)
movies

Unnamed: 0,tconst,primaryTitle,startYear,averageRating,numVotes,War,Romance,Animation,Music,Drama,...,Talk-Show,Biography,Family,Mystery,Comedy,Action,History,News,Documentary,tsne_score
363,tt0070735,The Sting,-0.290170,0.189544,0.045979,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,-133.915802
356,tt0070510,Paper Moon,-0.290170,0.165447,-0.035811,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,-133.912109
353,tt0070334,The Long Goodbye,-0.290170,0.093158,-0.042073,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,-133.885086
569,tt0085794,The King of Comedy,-0.201935,0.129303,-0.014691,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,-133.817230
863,tt0097223,Time of the Gypsies,-0.143111,0.165447,-0.042279,0,0,0,0,1,...,0,0,0,0,1,0,0,0,0,-133.759521
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1523,tt0119951,Private Parts,-0.054876,0.020869,-0.039675,0,0,0,0,1,...,0,1,0,0,1,0,0,0,0,133.671768
1215,tt0110877,The Postman,-0.084288,0.117255,-0.040043,0,0,0,0,1,...,0,1,0,0,1,0,0,0,0,133.715988
1184,tt0109707,Ed Wood,-0.084288,0.129303,0.013612,0,0,0,0,1,...,0,1,0,0,1,0,0,0,0,133.716049
1029,tt0103939,Chaplin,-0.103895,0.093158,-0.031845,0,0,0,0,1,...,0,1,0,0,1,0,0,0,0,133.732178


In [99]:
movies_sorted = movies[['tconst', 'primaryTitle']].copy()
movies_sorted

Unnamed: 0,tconst,primaryTitle
363,tt0070735,The Sting
356,tt0070510,Paper Moon
353,tt0070334,The Long Goodbye
569,tt0085794,The King of Comedy
863,tt0097223,Time of the Gypsies
...,...,...
1523,tt0119951,Private Parts
1215,tt0110877,The Postman
1184,tt0109707,Ed Wood
1029,tt0103939,Chaplin


In [100]:
movies_sorted.reset_index(drop=True, inplace=True)
movies_sorted

Unnamed: 0,tconst,primaryTitle
0,tt0070735,The Sting
1,tt0070510,Paper Moon
2,tt0070334,The Long Goodbye
3,tt0085794,The King of Comedy
4,tt0097223,Time of the Gypsies
...,...,...
5692,tt0119951,Private Parts
5693,tt0110877,The Postman
5694,tt0109707,Ed Wood
5695,tt0103939,Chaplin


In [101]:
movies_sorted.to_csv('tsne_sorted_move.csv')

### t-SNE recommend processing 

In [102]:
movies_sorted.shape[0]

5697

In [103]:
def tSNE_recommend(title, sorted_movies_df, n=10):
    movie_index = sorted_movies_df.index[sorted_movies_df['primaryTitle'] == title].tolist()
    
    if movie_index:
        movie_index = movie_index[0]

        len = movies_sorted.shape[0]
        if n > len-2:
            return 'Range out of list'

        front = movie_index - n//2 
        back = movie_index + n//2 

        if front < 0:
            back += -front
            front = 0
        elif back > len-1:
            front -= back - (len-1)
            back = len-1
        print('Recommend Movies:')
        for i in range(front, back):
            if i != movie_index:            
                print(sorted_movies_df.at[i, 'primaryTitle'])
    
    else:
        return 'Can\'t Find this Movie'




In [104]:
searching_movie = 'Avengers: Age of Ultron'

tSNE_recommend(searching_movie, movies_sorted)

Recommend Movies:
Iron Man
Star Wars: Episode VII - The Force Awakens
Captain America: The Winter Soldier
The Hunger Games
Captain America: Civil War
X-Men: Days of Future Past
Iron Man 3
Edge of Tomorrow
Jurassic Park
