# Movie Recommender Model using KNN Algorithm

## 1. Import Modules 

In [1]:
import pickle
import json
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors

## 2. Download data 

In [2]:
movies=pd.read_csv('https://raw.githubusercontent.com/4GeeksAcademy/gperdrizet-k-nearest-neighbors/refs/heads/main/data/raw/tmdb_5000_movies.csv')
credits=pd.read_csv('https://raw.githubusercontent.com/4GeeksAcademy/gperdrizet-k-nearest-neighbors/refs/heads/main/data/raw/tmdb_5000_credits.csv')

In [3]:
movies.head().T

Unnamed: 0,0,1,2,3,4
budget,237000000,300000000,245000000,250000000,260000000
genres,"[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 14, ""...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam...","[{""id"": 28, ""name"": ""Action""}, {""id"": 80, ""nam...","[{""id"": 28, ""name"": ""Action""}, {""id"": 12, ""nam..."
homepage,http://www.avatarmovie.com/,http://disney.go.com/disneypictures/pirates/,http://www.sonypictures.com/movies/spectre/,http://www.thedarkknightrises.com/,http://movies.disney.com/john-carter
id,19995,285,206647,49026,49529
keywords,"[{""id"": 1463, ""name"": ""culture clash""}, {""id"":...","[{""id"": 270, ""name"": ""ocean""}, {""id"": 726, ""na...","[{""id"": 470, ""name"": ""spy""}, {""id"": 818, ""name...","[{""id"": 849, ""name"": ""dc comics""}, {""id"": 853,...","[{""id"": 818, ""name"": ""based on novel""}, {""id"":..."
original_language,en,en,en,en,en
original_title,Avatar,Pirates of the Caribbean: At World's End,Spectre,The Dark Knight Rises,John Carter
overview,"In the 22nd century, a paraplegic Marine is di...","Captain Barbossa, long believed to be dead, ha...",A cryptic message from Bond’s past sends him o...,Following the death of District Attorney Harve...,"John Carter is a war-weary, former military ca..."
popularity,150.437577,139.082615,107.376788,112.31295,43.926995
production_companies,"[{""name"": ""Ingenious Film Partners"", ""id"": 289...","[{""name"": ""Walt Disney Pictures"", ""id"": 2}, {""...","[{""name"": ""Columbia Pictures"", ""id"": 5}, {""nam...","[{""name"": ""Legendary Pictures"", ""id"": 923}, {""...","[{""name"": ""Walt Disney Pictures"", ""id"": 2}]"


In [4]:
credits.head().T

Unnamed: 0,0,1,2,3,4
movie_id,19995,285,206647,49026,49529
title,Avatar,Pirates of the Caribbean: At World's End,Spectre,The Dark Knight Rises,John Carter
cast,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c..."
crew,"[{""credit_id"": ""52fe48009251416c750aca23"", ""de...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


## 3. Prepare Data Frame
### 3.1 Prepare credits for merge 

In [5]:
credits.rename({'movie_id':'id'}, axis=1, inplace=True)
credits.head().T


Unnamed: 0,0,1,2,3,4
id,19995,285,206647,49026,49529
title,Avatar,Pirates of the Caribbean: At World's End,Spectre,The Dark Knight Rises,John Carter
cast,"[{""cast_id"": 242, ""character"": ""Jake Sully"", ""...","[{""cast_id"": 4, ""character"": ""Captain Jack Spa...","[{""cast_id"": 1, ""character"": ""James Bond"", ""cr...","[{""cast_id"": 2, ""character"": ""Bruce Wayne / Ba...","[{""cast_id"": 5, ""character"": ""John Carter"", ""c..."
crew,"[{""credit_id"": ""52fe48009251416c750aca23"", ""de...","[{""credit_id"": ""52fe4232c3a36847f800b579"", ""de...","[{""credit_id"": ""54805967c3a36829b5002c41"", ""de...","[{""credit_id"": ""52fe4781c3a36847f81398c3"", ""de...","[{""credit_id"": ""52fe479ac3a36847f813eaa3"", ""de..."


### 3.2 Merge movies and credits df 

In [6]:
data_df=pd.merge(movies,credits, on='id', how='outer')
data_df.head().T

Unnamed: 0,0,1,2,3,4
budget,4000000,11000000,94000000,55000000,15000000
genres,"[{""id"": 80, ""name"": ""Crime""}, {""id"": 35, ""name...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 28, ""...","[{""id"": 16, ""name"": ""Animation""}, {""id"": 10751...","[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...","[{""id"": 18, ""name"": ""Drama""}]"
homepage,,http://www.starwars.com/films/star-wars-episod...,http://movies.disney.com/finding-nemo,,http://www.dreamworks.com/ab/
id,5,11,12,13,14
keywords,"[{""id"": 612, ""name"": ""hotel""}, {""id"": 613, ""na...","[{""id"": 803, ""name"": ""android""}, {""id"": 4270, ...","[{""id"": 494, ""name"": ""father son relationship""...","[{""id"": 422, ""name"": ""vietnam veteran""}, {""id""...","[{""id"": 255, ""name"": ""male nudity""}, {""id"": 29..."
original_language,en,en,en,en,en
original_title,Four Rooms,Star Wars,Finding Nemo,Forrest Gump,American Beauty
overview,It's Ted the Bellhop's first night on the job....,Princess Leia is captured and held hostage by ...,"Nemo, an adventurous young clownfish, is unexp...",A man with a low IQ has accomplished great thi...,"Lester Burnham, a depressed suburban father in..."
popularity,22.87623,126.393695,85.688789,138.133331,80.878605
production_companies,"[{""name"": ""Miramax Films"", ""id"": 14}, {""name"":...","[{""name"": ""Lucasfilm"", ""id"": 1}, {""name"": ""Twe...","[{""name"": ""Pixar Animation Studios"", ""id"": 3}]","[{""name"": ""Paramount Pictures"", ""id"": 4}]","[{""name"": ""DreamWorks SKG"", ""id"": 27}, {""name""..."


### 3.3 Drop unnecessary features 

In [7]:
data_df.drop(['title_x', 'title_y'], axis=1, inplace=True)


In [8]:
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4803 non-null   int64  
 1   genres                4803 non-null   object 
 2   homepage              1712 non-null   object 
 3   id                    4803 non-null   int64  
 4   keywords              4803 non-null   object 
 5   original_language     4803 non-null   object 
 6   original_title        4803 non-null   object 
 7   overview              4800 non-null   object 
 8   popularity            4803 non-null   float64
 9   production_companies  4803 non-null   object 
 10  production_countries  4803 non-null   object 
 11  release_date          4802 non-null   object 
 12  revenue               4803 non-null   int64  
 13  runtime               4801 non-null   float64
 14  spoken_languages      4803 non-null   object 
 15  status               

### 3.4 Rename title feature 

In [18]:
data_df.rename({'original_title':'title'}, axis=1, inplace=True)
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4803 entries, 0 to 4802
Data columns (total 21 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   budget                4803 non-null   int64  
 1   genres                4803 non-null   object 
 2   homepage              1712 non-null   object 
 3   id                    4803 non-null   int64  
 4   keywords              4803 non-null   object 
 5   original_language     4803 non-null   object 
 6   title                 4803 non-null   object 
 7   overview              4800 non-null   object 
 8   popularity            4803 non-null   float64
 9   production_companies  4803 non-null   object 
 10  production_countries  4803 non-null   object 
 11  release_date          4802 non-null   object 
 12  revenue               4803 non-null   int64  
 13  runtime               4801 non-null   float64
 14  spoken_languages      4803 non-null   object 
 15  status               

In [20]:
data_df.head().T

Unnamed: 0,0,1,2,3,4
budget,4000000,11000000,94000000,55000000,15000000
genres,"[{""id"": 80, ""name"": ""Crime""}, {""id"": 35, ""name...","[{""id"": 12, ""name"": ""Adventure""}, {""id"": 28, ""...","[{""id"": 16, ""name"": ""Animation""}, {""id"": 10751...","[{""id"": 35, ""name"": ""Comedy""}, {""id"": 18, ""nam...","[{""id"": 18, ""name"": ""Drama""}]"
homepage,,http://www.starwars.com/films/star-wars-episod...,http://movies.disney.com/finding-nemo,,http://www.dreamworks.com/ab/
id,5,11,12,13,14
keywords,"[{""id"": 612, ""name"": ""hotel""}, {""id"": 613, ""na...","[{""id"": 803, ""name"": ""android""}, {""id"": 4270, ...","[{""id"": 494, ""name"": ""father son relationship""...","[{""id"": 422, ""name"": ""vietnam veteran""}, {""id""...","[{""id"": 255, ""name"": ""male nudity""}, {""id"": 29..."
original_language,en,en,en,en,en
title,Four Rooms,Star Wars,Finding Nemo,Forrest Gump,American Beauty
overview,It's Ted the Bellhop's first night on the job....,Princess Leia is captured and held hostage by ...,"Nemo, an adventurous young clownfish, is unexp...",A man with a low IQ has accomplished great thi...,"Lester Burnham, a depressed suburban father in..."
popularity,22.87623,126.393695,85.688789,138.133331,80.878605
production_companies,"[{""name"": ""Miramax Films"", ""id"": 14}, {""name"":...","[{""name"": ""Lucasfilm"", ""id"": 1}, {""name"": ""Twe...","[{""name"": ""Pixar Animation Studios"", ""id"": 3}]","[{""name"": ""Paramount Pictures"", ""id"": 4}]","[{""name"": ""DreamWorks SKG"", ""id"": 27}, {""name""..."


## 4. Encode features 

In [23]:
data_df['cast'][0]

'[{"cast_id": 42, "character": "Ted the Bellhop", "credit_id": "52fe420dc3a36847f80001b7", "gender": 2, "id": 3129, "name": "Tim Roth", "order": 0}, {"cast_id": 31, "character": "Man", "credit_id": "52fe420dc3a36847f800018b", "gender": 2, "id": 3131, "name": "Antonio Banderas", "order": 1}, {"cast_id": 29, "character": "Angela", "credit_id": "52fe420dc3a36847f8000183", "gender": 1, "id": 3130, "name": "Jennifer Beals", "order": 2}, {"cast_id": 25, "character": "Elspeth", "credit_id": "52fe420dc3a36847f8000173", "gender": 1, "id": 3125, "name": "Madonna", "order": 3}, {"cast_id": 41, "character": "Margaret", "credit_id": "52fe420dc3a36847f80001b3", "gender": 1, "id": 3141, "name": "Marisa Tomei", "order": 4}, {"cast_id": 43, "character": "Leo", "credit_id": "52fe420dc3a36847f80001bb", "gender": 2, "id": 62, "name": "Bruce Willis", "order": 5}, {"cast_id": 38, "character": "Chester Rush", "credit_id": "52fe420dc3a36847f80001a7", "gender": 2, "id": 138, "name": "Quentin Tarantino", "order