# K-nearest-neighbour

In [21]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [22]:
!gdown 'https://drive.google.com/uc?id=10Ls4AOmNg-ACgzoAb5eUG4CVvoeE0ljn'
column_names = ['user_id', 'item_id', 'ratings', 'timestamp']
df = pd.read_csv('ratings.dat', sep = '::', names = column_names, engine='python')

Downloading...
From: https://drive.google.com/uc?id=10Ls4AOmNg-ACgzoAb5eUG4CVvoeE0ljn
To: /content/ratings.dat
100% 24.6M/24.6M [00:00<00:00, 40.6MB/s]


In [23]:
df.head()

Unnamed: 0,user_id,item_id,ratings,timestamp
0,1,1193,5,978300760
1,1,661,3,978302109
2,1,914,3,978301968
3,1,3408,4,978300275
4,1,2355,5,978824291


In [24]:
!gdown 'https://drive.google.com/uc?id=1n8ixAV7Uq1A4zmTM3A3qOdrdQPsnqpHh'
column_names = ['id', 'title', 'genre']
movies = pd.read_csv('movies.dat', sep='::', names=column_names, engine='python', encoding='latin1')

Downloading...
From: https://drive.google.com/uc?id=1n8ixAV7Uq1A4zmTM3A3qOdrdQPsnqpHh
To: /content/movies.dat
  0% 0.00/171k [00:00<?, ?B/s]100% 171k/171k [00:00<00:00, 69.1MB/s]


In [25]:
movies.head()

Unnamed: 0,id,title,genre
0,1,Toy Story (1995),Animation|Children's|Comedy
1,2,Jumanji (1995),Adventure|Children's|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama
4,5,Father of the Bride Part II (1995),Comedy


In [26]:
movies = movies.merge(df, left_on = 'id', right_on = 'item_id')
movies.head()

Unnamed: 0,id,title,genre,user_id,item_id,ratings,timestamp
0,1,Toy Story (1995),Animation|Children's|Comedy,1,1,5,978824268
1,1,Toy Story (1995),Animation|Children's|Comedy,6,1,4,978237008
2,1,Toy Story (1995),Animation|Children's|Comedy,8,1,4,978233496
3,1,Toy Story (1995),Animation|Children's|Comedy,9,1,5,978225952
4,1,Toy Story (1995),Animation|Children's|Comedy,10,1,5,978226474


In [27]:
# Relevent data
movies = movies[['id', 'title', 'user_id', 'ratings']]
movies.head()

Unnamed: 0,id,title,user_id,ratings
0,1,Toy Story (1995),1,5
1,1,Toy Story (1995),6,4
2,1,Toy Story (1995),8,4
3,1,Toy Story (1995),9,5
4,1,Toy Story (1995),10,5


In [28]:
# Number of users rated -> count
movies.groupby('title')['ratings'].count().sort_values(ascending = False).head()

Unnamed: 0_level_0,ratings
title,Unnamed: 1_level_1
American Beauty (1999),3428
Star Wars: Episode IV - A New Hope (1977),2991
Star Wars: Episode V - The Empire Strikes Back (1980),2990
Star Wars: Episode VI - Return of the Jedi (1983),2883
Jurassic Park (1993),2672


In [40]:
count = pd.DataFrame(movies.groupby('title')['ratings'].count()).rename (columns = {'ratings' : 'count'})
count

Unnamed: 0_level_0,count
title,Unnamed: 1_level_1
"$1,000,000 Duck (1971)",37
'Night Mother (1986),70
'Til There Was You (1997),52
"'burbs, The (1989)",303
...And Justice for All (1979),199
...,...
"Zed & Two Noughts, A (1985)",29
Zero Effect (1998),301
Zero Kelvin (Kjærlighetens kjøtere) (1995),2
Zeus and Roxanne (1997),23


In [30]:
movies = movies.merge(count, left_on = 'title', right_on = 'title')
movies

Unnamed: 0,id,title,user_id,ratings,count
0,1,Toy Story (1995),1,5,2077
1,1,Toy Story (1995),6,4,2077
2,1,Toy Story (1995),8,4,2077
3,1,Toy Story (1995),9,5,2077
4,1,Toy Story (1995),10,5,2077
...,...,...,...,...,...
1000204,3952,"Contender, The (2000)",5812,4,388
1000205,3952,"Contender, The (2000)",5831,3,388
1000206,3952,"Contender, The (2000)",5837,4,388
1000207,3952,"Contender, The (2000)",5927,1,388


In [31]:
# Pivot Table
movie_matrix = movies.pivot_table(index='user_id', columns='title', values='ratings').fillna(0)
movie_matrix.shape

(6040, 3706)

In [32]:
movie_matrix

title,"$1,000,000 Duck (1971)",'Night Mother (1986),'Til There Was You (1997),"'burbs, The (1989)",...And Justice for All (1979),1-900 (1994),10 Things I Hate About You (1999),101 Dalmatians (1961),101 Dalmatians (1996),12 Angry Men (1957),...,"Young Poisoner's Handbook, The (1995)",Young Sherlock Holmes (1985),Young and Innocent (1937),Your Friends and Neighbors (1998),Zachariah (1971),"Zed & Two Noughts, A (1985)",Zero Effect (1998),Zero Kelvin (Kjærlighetens kjøtere) (1995),Zeus and Roxanne (1997),eXistenZ (1999)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6036,0.0,3.0,0.0,0.0,0.0,0.0,2.0,4.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0
6037,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6038,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6039,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [33]:
# Convert the movie matrix to a sparse matrix
movie_features_matrix = csr_matrix(movie_matrix.values)

# Initialize and fit the NearestNeighbors model
model_knn = NearestNeighbors(n_neighbors=5, metric='cosine')

# Fit the model on the transposed matrix (movies as rows)
model_knn.fit(movie_features_matrix.T)

In [34]:
movie_matrix.shape

(6040, 3706)

In [35]:
# Find the nearest neighbors for 'Toy Story (1995)'
movie_index = list(movie_matrix.columns).index('Toy Story (1995)')

# Query the model for the nearest neighbors of the movie
distances, indices = model_knn.kneighbors(movie_features_matrix.T[movie_index].reshape(1, -1), n_neighbors=11)

In [36]:
# Print the results
print("Nearest neighbors of Toy Story (1995) - ")
for i in range(1, len(indices[0])):
    print(f"{i}. {movie_matrix.columns[indices[0][i]]}, Distance: {distances[0][i]}")

Nearest neighbors of Toy Story (1995) - 
1. Toy Story 2 (1999), Distance: 0.3668962584412051
2. Groundhog Day (1993), Distance: 0.38917384416450684
3. Aladdin (1992), Distance: 0.39415088991282676
4. Bug's Life, A (1998), Distance: 0.42061846377095635
5. Back to the Future (1985), Distance: 0.42987458138056645
6. Babe (1995), Distance: 0.4363629086034405
7. Star Wars: Episode V - The Empire Strikes Back (1980), Distance: 0.4471444659756979
8. Men in Black (1997), Distance: 0.4476378517643327
9. Forrest Gump (1994), Distance: 0.4489660370084352
10. Matrix, The (1999), Distance: 0.4497059825033908


In [37]:
def recommend(movie):
    # Get the index of the movie in the movie matrix
    movie_index = list(movie_matrix.columns).index(movie)

    # Find the nearest neighbors of the given movie
    distances, indices = model_knn.kneighbors(movie_features_matrix.T[movie_index].reshape(1, -1), n_neighbors=11)

    print(f"Nearest neighbors of {movie} - ")
    for i in range(1, len(indices[0])):
        print(f"{i}. {movie_matrix.columns[indices[0][i]]}, Distance: {distances[0][i]}")

In [38]:
recommend('Jurassic Park (1993)')

Nearest neighbors of Jurassic Park (1993) - 
1. Men in Black (1997), Distance: 0.2713809505181499
2. Terminator 2: Judgment Day (1991), Distance: 0.290139469264282
3. Matrix, The (1999), Distance: 0.3216407138843622
4. Total Recall (1990), Distance: 0.3273559577116679
5. Independence Day (ID4) (1996), Distance: 0.3623598942755136
6. Star Wars: Episode IV - A New Hope (1977), Distance: 0.36386738414329933
7. Star Wars: Episode V - The Empire Strikes Back (1980), Distance: 0.36447674184413903
8. Star Wars: Episode VI - Return of the Jedi (1983), Distance: 0.36905938270670924
9. Star Wars: Episode I - The Phantom Menace (1999), Distance: 0.3754476846791901
10. Braveheart (1995), Distance: 0.3755410970493265


In [39]:
recommend('Dark Half, The (1993)')

Nearest neighbors of Dark Half, The (1993) - 
1. Sleepwalkers (1992), Distance: 0.6112050577254104
2. Lord of Illusions (1995), Distance: 0.6118692024121054
3. Candyman (1992), Distance: 0.6156226576802627
4. Needful Things (1993), Distance: 0.6239082646900627
5. Exorcist III, The (1990), Distance: 0.6399736348752996
6. Graveyard Shift (1990), Distance: 0.6400039888533708
7. Vampires (1998), Distance: 0.6409728745050918
8. Halloween: H20 (1998), Distance: 0.6465163816017214
9. Wes Craven's New Nightmare (1994), Distance: 0.6598665383683449
10. Scream 2 (1997), Distance: 0.6662488855316913
