## **Data Loading and Preprocessing**

In [3]:
# Importing Libraries
import pandas as pd

# Load the MovieLens 1M Dataset
!wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
!unzip ml-1m.zip

ratings = pd.read_csv('ml-1m/ratings.dat', sep='::', engine='python',
                      names=['UserID', 'MovieID', 'Rating', 'Timestamp'], encoding='latin1')
movies = pd.read_csv('ml-1m/movies.dat', sep='::', engine='python',
                     names=['MovieID', 'Title', 'Genres'], encoding='latin1')
users = pd.read_csv('ml-1m/users.dat', sep='::', engine='python',
                    names=['UserID', 'Gender', 'Age', 'Occupation', 'Zip-code'], encoding='latin1')

# Merge the tables
merged_data = pd.merge(pd.merge(ratings, movies, on='MovieID'), users, on='UserID')

merged_data.head()



--2024-04-09 13:17:53--  http://files.grouplens.org/datasets/movielens/ml-1m.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5917549 (5.6M) [application/zip]
Saving to: ‘ml-1m.zip.1’


2024-04-09 13:17:55 (6.26 MB/s) - ‘ml-1m.zip.1’ saved [5917549/5917549]

Archive:  ml-1m.zip
replace ml-1m/movies.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace ml-1m/ratings.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace ml-1m/README? [y]es, [n]o, [A]ll, [N]one, [r]ename: n
replace ml-1m/users.dat? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


Unnamed: 0,UserID,MovieID,Rating,Timestamp,Title,Genres,Gender,Age,Occupation,Zip-code
0,1,1193,5,978300760,One Flew Over the Cuckoo's Nest (1975),Drama,F,1,10,48067
1,1,661,3,978302109,James and the Giant Peach (1996),Animation|Children's|Musical,F,1,10,48067
2,1,914,3,978301968,My Fair Lady (1964),Musical|Romance,F,1,10,48067
3,1,3408,4,978300275,Erin Brockovich (2000),Drama,F,1,10,48067
4,1,2355,5,978824291,"Bug's Life, A (1998)",Animation|Children's|Comedy,F,1,10,48067


## **Preparing Data for Surprise**

In [5]:
!pip install surprise


Collecting surprise
  Using cached surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise (from surprise)
  Using cached scikit-surprise-1.1.3.tar.gz (771 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.3-cp310-cp310-linux_x86_64.whl size=3163006 sha256=bd97c8004bfd3fa28feaf75971a56f80b60bf524e1eaa1c491e27fdeccdaaece
  Stored in directory: /root/.cache/pip/wheels/a5/ca/a8/4e28def53797fdc4363ca4af740db15a9c2f1595ebc51fb445
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.3 surprise-0.1


In [6]:
from surprise import Dataset, Reader
from surprise.model_selection import train_test_split

# Preparing Data for Surprise
reader = Reader(rating_scale=(1, 5))
data = Dataset.load_from_df(merged_data[['UserID', 'MovieID', 'Rating']], reader)
trainset, testset = train_test_split(data, test_size=0.2, random_state=42)

trainset

<surprise.trainset.Trainset at 0x7f9b8bc18cd0>

## **Training Collaborative Filtering Algorithm**

In [7]:
#!pip install scikit-surprise
from surprise import SVD
from surprise import Dataset
#data = Dataset.load_builtin('ml-100k')
##trainset = data.build_full_trainset()

# Training Collaborative Filtering Algorithm
algo = SVD()
algo.fit(trainset)


<surprise.prediction_algorithms.matrix_factorization.SVD at 0x7f9b7adb5990>

## **Making Predictions and Calculating RMSE**

In [8]:
from sklearn.metrics import mean_squared_error
from math import sqrt

# Making Predictions
predictions = algo.test(testset)

# Calculating RMSE
rmse = sqrt(mean_squared_error([pred.r_ui for pred in predictions], [pred.est for pred in predictions]))
print("RMSE:", rmse)



RMSE: 0.8732657113634642
