In [68]:
import pandas as pd
import numpy as np

import sklearn
from sklearn.decomposition import TruncatedSVD

In [69]:
#preparing the data
columns=['user_id','item_id','rating','timestamp']
frame=pd.read_csv('u.data',sep='\t',names=columns)
frame.head()

Unnamed: 0,user_id,item_id,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [70]:
columns = ['item_id', 'movie title', 'release date', 'video release date', 'IMDb URL', 'unknown', 'Action', 'Adventure',
          'Animation', 'Childrens', 'Comedy', 'Crime', 'Documentary', 'Drama', 'Fantasy', 'Film-Noir', 'Horror',
          'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Thriller', 'War', 'Western']

movies = pd.read_csv('u.item', sep='|', names=columns, encoding='latin-1')
movie_names = movies[['item_id', 'movie title']]
movie_names.head()

Unnamed: 0,item_id,movie title
0,1,Toy Story (1995)
1,2,GoldenEye (1995)
2,3,Four Rooms (1995)
3,4,Get Shorty (1995)
4,5,Copycat (1995)


In [71]:
combined_movies_data = pd.merge(frame, movie_names, on='item_id')
combined_movies_data.head()

Unnamed: 0,user_id,item_id,rating,timestamp,movie title
0,196,242,3,881250949,Kolya (1996)
1,63,242,3,875747190,Kolya (1996)
2,226,242,5,883888671,Kolya (1996)
3,154,242,3,879138235,Kolya (1996)
4,306,242,5,876503793,Kolya (1996)


In [72]:
combined_movies_data.groupby('item_id')['rating'].count().sort_values(ascending=False).head()

item_id
50     583
258    509
100    508
181    507
294    485
Name: rating, dtype: int64

In [73]:
filter = combined_movies_data['item_id']==11
combined_movies_data[filter]['movie title'].unique()

array(['Seven (Se7en) (1995)'], dtype=object)

In [74]:
rating_crosstab = combined_movies_data.pivot_table(values='rating', index='user_id', columns='movie title', fill_value=0)
rating_crosstab.head()

movie title,'Til There Was You (1997),1-900 (1994),101 Dalmatians (1996),12 Angry Men (1957),187 (1997),2 Days in the Valley (1996),"20,000 Leagues Under the Sea (1954)",2001: A Space Odyssey (1968),3 Ninjas: High Noon At Mega Mountain (1998),"39 Steps, The (1935)",...,Yankee Zulu (1994),Year of the Horse (1997),You So Crazy (1994),Young Frankenstein (1974),Young Guns (1988),Young Guns II (1990),"Young Poisoner's Handbook, The (1995)",Zeus and Roxanne (1997),unknown,Á köldum klaka (Cold Fever) (1994)
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0,0,2,5,0,0,3,4,0,0,...,0,0,0,5,3,0,0,0,4,0
2,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,2,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,0,0,2,0,0,0,0,4,0,0,...,0,0,0,4,0,0,0,0,4,0


In [75]:
rating_crosstab.shape

(943, 1664)

In [76]:
X = rating_crosstab.T
X.shape

(1664, 943)

In [77]:
SVD = TruncatedSVD(n_components=12, random_state=17)

resultant_matrix = SVD.fit_transform(X)

resultant_matrix.shape

(1664, 12)

In [78]:
corr_mat = np.corrcoef(resultant_matrix)
corr_mat.shape

(1664, 1664)

In [79]:
movie_names = rating_crosstab.columns
movies_list = list(movie_names)

seven= movies_list.index('Seven (Se7en) (1995)')
seven

1301

In [80]:
seven = corr_mat[1398]
seven.shape

(1664,)

In [81]:
list(movie_names[(seven<1.0) & (seven >0.7)])


['12 Angry Men (1957)',
 '2 Days in the Valley (1996)',
 '2001: A Space Odyssey (1968)',
 'Abyss, The (1989)',
 'Akira (1988)',
 'Aladdin (1992)',
 'Alien (1979)',
 'Alien 3 (1992)',
 'Aliens (1986)',
 'Amadeus (1984)',
 'Apocalypse Now (1979)',
 'Apollo 13 (1995)',
 'Army of Darkness (1993)',
 'Arrival, The (1996)',
 'Austin Powers: International Man of Mystery (1997)',
 'Babe (1995)',
 'Back to the Future (1985)',
 'Bad Boys (1995)',
 'Bad Company (1995)',
 'Basic Instinct (1992)',
 'Batman & Robin (1997)',
 'Batman (1989)',
 'Batman Forever (1995)',
 'Batman Returns (1992)',
 'Beans of Egypt, Maine, The (1994)',
 'Beauty and the Beast (1991)',
 'Beavis and Butt-head Do America (1996)',
 'Ben-Hur (1959)',
 'Birdcage, The (1996)',
 'Blade Runner (1982)',
 'Blues Brothers, The (1980)',
 'Boot, Das (1981)',
 'Braveheart (1995)',
 'Brazil (1985)',
 'Bridge on the River Kwai, The (1957)',
 'Broken Arrow (1996)',
 'Butch Cassidy and the Sundance Kid (1969)',
 'Casablanca (1942)',
 'Casino 

In [82]:
list(movie_names[(seven<1.0) & (seven> 0.96)])

['Return of the Jedi (1983)']

In [85]:

last_10_movies = movies_list[-10:]

# Display the last 10 elements
last_10_movies


['Yankee Zulu (1994)',
 'Year of the Horse (1997)',
 'You So Crazy (1994)',
 'Young Frankenstein (1974)',
 'Young Guns (1988)',
 'Young Guns II (1990)',
 "Young Poisoner's Handbook, The (1995)",
 'Zeus and Roxanne (1997)',
 'unknown',
 'Á köldum klaka (Cold Fever) (1994)']

In [89]:
# Calculate average ratings for each movie
average_ratings = frame.groupby('item_id')['rating'].mean().sort_values(ascending=False)

# Display the top 10 movies with the highest average ratings
top_10_highly_rated_movies = average_ratings.head(10)
print(top_10_highly_rated_movies)



item_id
814     5.0
1599    5.0
1201    5.0
1122    5.0
1653    5.0
1293    5.0
1500    5.0
1189    5.0
1536    5.0
1467    5.0
Name: rating, dtype: float64


In [92]:
# Merge 'movies' with 'average_ratings' on 'item_id'
merged_data = pd.merge(movies, average_ratings, on='item_id')

# Display the top 10 movies with the highest average ratings along with their names
top_10_movies_with_names = merged_data.sort_values(by='rating', ascending=False).head(10)
print(top_10_movies_with_names[['item_id', 'movie title', 'rating']])


      item_id                                        movie title  rating
813       814                      Great Day in Harlem, A (1994)     5.0
1598     1599                      Someone Else's America (1995)     5.0
1200     1201         Marlene Dietrich: Shadow and Light (1996)      5.0
1121     1122                     They Made Me a Criminal (1939)     5.0
1652     1653  Entertaining Angels: The Dorothy Day Story (1996)     5.0
1292     1293                                    Star Kid (1997)     5.0
1499     1500                          Santa with Muscles (1996)     5.0
1188     1189                                 Prefontaine (1997)     5.0
1535     1536                               Aiqing wansui (1994)     5.0
1466     1467               Saint of Fort Washington, The (1993)     5.0
