In [1]:
import pandas as pd
import numpy as np
import torch
from torch.autograd import variable
from tqdm import tqdm_notebook as tqdm
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader 
from sklearn.cluster import KMeans


In [2]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        # create user embeddings
        self.user_factors = torch.nn.Embedding(n_users, n_factors) # think of this as a lookup table for the input.
        # create item embeddings
        self.item_factors = torch.nn.Embedding(n_items, n_factors) # think of this as a lookup table for the input.
        self.user_factors.weight.data.uniform_(0, 0.05)
        self.item_factors.weight.data.uniform_(0, 0.05)
        
    def forward(self, data):
        # matrix multiplication
        users, items = data[:,0], data[:,1]
        return (self.user_factors(users)*self.item_factors(items)).sum(1)
    # def forward(self, user, item):
    # 	# matrix multiplication
    #     return (self.user_factors(user)*self.item_factors(item)).sum(1)
    
    def predict(self, user, item):
        return self.forward(user, item)

In [3]:
class Loader(Dataset):
    def __init__(self):
        self.ratings = ratings_df.copy()
        
        # Extract all user IDs and movie IDs
        users = ratings_df.userId.unique()
        movies = ratings_df.movieId.unique()
        
        #--- Producing new continuous IDs for users and movies ---
        
        # Unique values : index
        self.userid2idx = {o:i for i,o in enumerate(users)}
        self.movieid2idx = {o:i for i,o in enumerate(movies)}
        
        # Obtained continuous ID for users and movies
        self.idx2userid = {i:o for o,i in self.userid2idx.items()}
        self.idx2movieid = {i:o for o,i in self.movieid2idx.items()}
        
        # return the id from the indexed values as noted in the lambda function down below.
        self.ratings.movieId = ratings_df.movieId.apply(lambda x: self.movieid2idx[x])
        self.ratings.userId = ratings_df.userId.apply(lambda x: self.userid2idx[x])
        
        
        self.x = self.ratings.drop(['rating', 'timestamp'], axis=1).values
        self.y = self.ratings['rating'].values
        self.x, self.y = torch.tensor(self.x), torch.tensor(self.y) # Transforms the data to tensors (ready for torch models.)

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.ratings)

In [46]:
class recommender():
    def __init__(self, n_users, n_items,  num_epochs= 128, lr= 1e-3, n_factors= 8):
        self.num_epochs = num_epochs
        self.lr = lr
        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors

    def clusters(self):
        cuda = torch.cuda.is_available()

        print("Is running on GPU:", cuda)
        
        model = MatrixFactorization(self.n_users, self.n_items, self.n_factors)
        print(model)
        for name, param in model.named_parameters():
            if param.requires_grad:
                print(name, param.data)
        # GPU enable if you have a GPU...
        if cuda:
            model = model.cuda()
        
        # MSE loss
        loss_fn = torch.nn.MSELoss()
        
        # ADAM optimizier
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        
        # Train data
        train_set = Loader()
        train_loader = DataLoader(train_set, 128, shuffle=True)

        for it in tqdm(range(self.num_epochs)):
            losses = []
            for x, y in train_loader:
                if cuda:
                    x, y = x.cuda(), y.cuda()
                else:
                    x, y = x.cpu(), y.cpu()
                    
                optimizer.zero_grad()
                outputs = model(x)
                loss = loss_fn(outputs.squeeze(), y.type(torch.float32))
                losses.append(loss.item())
                loss.backward()
                optimizer.step()
                
            print("iter #{}".format(it), "Loss:", sum(losses) / len(losses))

        # By training the model, we will have tuned latent factors for movies and users.
        c = 0
        uw = 0
        iw = 0 
        for name, param in model.named_parameters():
            if param.requires_grad:
                print(name, param.data)
                if c == 0:
                  uw = param.data
                  c +=1
                else:
                  iw = param.data
                #print('param_data', param_data)
        
        trained_movie_embeddings = model.item_factors.weight.data.cpu().numpy()
        
        kmeans = KMeans(n_clusters=10, random_state=0).fit(trained_movie_embeddings)

        sets = []
        for cluster in range(10):
          #print("Cluster #{}".format(cluster))
          movs = []
          now = []    
          for movidx in np.where(kmeans.labels_ == cluster)[0]:
            movid = train_set.idx2movieid[movidx]
            rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
            movs.append((movie_names[movid], rat_count))
          for mov in sorted(movs, key=lambda tup: tup[1], reverse=True)[:10]:
              
            print("\t", mov[0])
            now.append(mov[0])
          sets.append(now)
        return sets  
    

In [64]:
class user():
    def __init__(self, user_id):
        self.user_id = user_id

    def recommend(self, sets, product_id, timing):
        if timing > 1:
            for i in range(len(sets)):
                for j in range(len(sets[0])):
                    if product_id == sets[i][j]:
                        return sets[i]
        return  

In [25]:
! curl http://files.grouplens.org/datasets/movielens/ml-latest-small.zip -o ml-latest-small.zip

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  955k  100  955k    0     0   118k      0  0:00:08  0:00:08 --:--:--  150k


In [26]:
import zipfile
with zipfile.ZipFile('ml-latest-small.zip', 'r') as zip_ref:
    zip_ref.extractall('data')

In [27]:
movies_df = pd.read_csv('data/ml-latest-small/movies.csv')
ratings_df = pd.read_csv('data/ml-latest-small/ratings.csv')

In [28]:
print('The dimensions of movies dataframe are:', movies_df.shape,'\nThe dimensions of ratings dataframe are:', ratings_df.shape)

The dimensions of movies dataframe are: (9742, 3) 
The dimensions of ratings dataframe are: (100836, 4)


In [29]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [30]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [31]:
movie_names = movies_df.set_index('movieId')['title'].to_dict()
n_users = len(ratings_df.userId.unique())
n_items = len(ratings_df.movieId.unique())
print("Number of unique users:", n_users)
print("Number of unique movies:", n_items)
print("The full rating matrix will have:", n_users*n_items, 'elements.')
print('----------')
print("Number of ratings:", len(ratings_df))
print("Therefore: ", len(ratings_df) / (n_users*n_items) * 100, '% of the matrix is filled.')

Number of unique users: 610
Number of unique movies: 9724
The full rating matrix will have: 5931640 elements.
----------
Number of ratings: 100836
Therefore:  1.6999683055613624 % of the matrix is filled.


In [48]:
object1 = recommender(n_users,n_items)


In [49]:
sets = object1.clusters()
#print(sets)

Is running on GPU: False
MatrixFactorization(
  (user_factors): Embedding(610, 8)
  (item_factors): Embedding(9724, 8)
)
user_factors.weight tensor([[0.0172, 0.0129, 0.0217,  ..., 0.0429, 0.0428, 0.0493],
        [0.0228, 0.0348, 0.0351,  ..., 0.0117, 0.0435, 0.0385],
        [0.0285, 0.0370, 0.0307,  ..., 0.0453, 0.0014, 0.0046],
        ...,
        [0.0201, 0.0469, 0.0467,  ..., 0.0367, 0.0130, 0.0441],
        [0.0084, 0.0029, 0.0461,  ..., 0.0366, 0.0391, 0.0483],
        [0.0294, 0.0465, 0.0079,  ..., 0.0307, 0.0028, 0.0370]])
item_factors.weight tensor([[0.0420, 0.0450, 0.0007,  ..., 0.0021, 0.0477, 0.0445],
        [0.0482, 0.0034, 0.0193,  ..., 0.0494, 0.0009, 0.0278],
        [0.0203, 0.0317, 0.0021,  ..., 0.0202, 0.0129, 0.0477],
        ...,
        [0.0319, 0.0049, 0.0257,  ..., 0.0079, 0.0024, 0.0036],
        [0.0410, 0.0341, 0.0013,  ..., 0.0480, 0.0344, 0.0219],
        [0.0117, 0.0404, 0.0136,  ..., 0.0398, 0.0299, 0.0059]])


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for it in tqdm(range(self.num_epochs)):


  0%|          | 0/128 [00:00<?, ?it/s]

iter #0 Loss: 11.061484868151283
iter #1 Loss: 4.742324513227201
iter #2 Loss: 2.4730383328677434
iter #3 Loss: 1.71963187053724
iter #4 Loss: 1.344490702382199
iter #5 Loss: 1.1278049754461057
iter #6 Loss: 0.9907427002784565
iter #7 Loss: 0.8999291462492822
iter #8 Loss: 0.8370689717192336
iter #9 Loss: 0.7919690196738025
iter #10 Loss: 0.7592638654984193
iter #11 Loss: 0.7346916911641354
iter #12 Loss: 0.7163302797805234
iter #13 Loss: 0.7015832112676601
iter #14 Loss: 0.6904263845101226
iter #15 Loss: 0.6815674520159131
iter #16 Loss: 0.6749473458468006
iter #17 Loss: 0.6699332412199925
iter #18 Loss: 0.665657105255248
iter #19 Loss: 0.6627240513030648
iter #20 Loss: 0.6605145968307699
iter #21 Loss: 0.6590090983258892
iter #22 Loss: 0.6578649883796721
iter #23 Loss: 0.6570145747988357
iter #24 Loss: 0.6561804579447974
iter #25 Loss: 0.6549935326509669
iter #26 Loss: 0.6545170225619059
iter #27 Loss: 0.6540286281160291
iter #28 Loss: 0.6530741579868466
iter #29 Loss: 0.652232675458

  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Star Wars: Episode I - The Phantom Menace (1999)
	 Pretty Woman (1990)
	 Waterworld (1995)
	 Net, The (1995)
	 Four Weddings and a Funeral (1994)
	 Star Wars: Episode II - Attack of the Clones (2002)
	 Nutty Professor, The (1996)
	 Star Wars: Episode III - Revenge of the Sith (2005)
	 Game, The (1997)
	 Charlie's Angels (2000)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Toy Story (1995)
	 Aladdin (1992)
	 Back to the Future (1985)
	 Beauty and the Beast (1991)
	 Mrs. Doubtfire (1993)
	 Babe (1995)
	 E.T. the Extra-Terrestrial (1982)
	 Willy Wonka & the Chocolate Factory (1971)
	 Home Alone (1990)
	 Ghost (1990)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Batman & Robin (1997)
	 Godzilla (1998)
	 Super Mario Bros. (1993)
	 Nutty Professor II: The Klumps (2000)
	 Karate Kid, Part III, The (1989)
	 Flintstones in Viva Rock Vegas, The (2000)
	 Stop! Or My Mom Will Shoot (1992)
	 Queen of the Damned (2002)
	 Jane Austen's Mafia! (1998)
	 Undercover Brother (2002)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Before Sunrise (1995)
	 Doctor Zhivago (1965)
	 Dogville (2003)
	 Ran (1985)
	 Kelly's Heroes (1970)
	 Malcolm X (1992)
	 Brotherhood of the Wolf (Pacte des loups, Le) (2001)
	 Twelfth Night (1996)
	 No Man's Land (2001)
	 Inside Job (2010)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Independence Day (a.k.a. ID4) (1996)
	 True Lies (1994)
	 Twister (1996)
	 Sleepless in Seattle (1993)
	 Outbreak (1995)
	 While You Were Sleeping (1995)
	 Matrix Reloaded, The (2003)
	 Armageddon (1998)
	 Broken Arrow (1996)
	 Matrix Revolutions, The (2003)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Forrest Gump (1994)
	 Shawshank Redemption, The (1994)
	 Silence of the Lambs, The (1991)
	 Matrix, The (1999)
	 Star Wars: Episode IV - A New Hope (1977)
	 Jurassic Park (1993)
	 Braveheart (1995)
	 Terminator 2: Judgment Day (1991)
	 Schindler's List (1993)
	 Fight Club (1999)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Pulp Fiction (1994)
	 Usual Suspects, The (1995)
	 American Beauty (1999)
	 Godfather, The (1972)
	 Fargo (1996)
	 Memento (2000)
	 One Flew Over the Cuckoo's Nest (1975)
	 Reservoir Dogs (1992)
	 Eternal Sunshine of the Spotless Mind (2004)
	 Godfather: Part II, The (1974)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Ace Ventura: Pet Detective (1994)
	 Mask, The (1994)
	 Dumb & Dumber (Dumb and Dumber) (1994)
	 Kill Bill: Vol. 1 (2003)
	 Austin Powers: The Spy Who Shagged Me (1999)
	 Kill Bill: Vol. 2 (2004)
	 Star Trek: First Contact (1996)
	 Ace Ventura: When Nature Calls (1995)
	 Mummy, The (1999)
	 Demolition Man (1993)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Mission: Impossible (1996)
	 Batman Forever (1995)
	 Clear and Present Danger (1994)
	 Jumanji (1995)
	 Crimson Tide (1995)
	 Firm, The (1993)
	 Back to the Future Part III (1990)
	 Back to the Future Part II (1989)
	 Top Gun (1986)
	 Contact (1997)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Speed (1994)
	 Die Hard: With a Vengeance (1995)
	 Stargate (1994)
	 GoldenEye (1995)
	 Star Trek: Generations (1994)
	 Cliffhanger (1993)
	 Happy Gilmore (1996)
	 Addams Family Values (1993)
	 Santa Clause, The (1994)
	 Liar Liar (1997)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

In [50]:
print(sets)

[['Star Wars: Episode I - The Phantom Menace (1999)', 'Pretty Woman (1990)', 'Waterworld (1995)', 'Net, The (1995)', 'Four Weddings and a Funeral (1994)', 'Star Wars: Episode II - Attack of the Clones (2002)', 'Nutty Professor, The (1996)', 'Star Wars: Episode III - Revenge of the Sith (2005)', 'Game, The (1997)', "Charlie's Angels (2000)"], ['Toy Story (1995)', 'Aladdin (1992)', 'Back to the Future (1985)', 'Beauty and the Beast (1991)', 'Mrs. Doubtfire (1993)', 'Babe (1995)', 'E.T. the Extra-Terrestrial (1982)', 'Willy Wonka & the Chocolate Factory (1971)', 'Home Alone (1990)', 'Ghost (1990)'], ['Batman & Robin (1997)', 'Godzilla (1998)', 'Super Mario Bros. (1993)', 'Nutty Professor II: The Klumps (2000)', 'Karate Kid, Part III, The (1989)', 'Flintstones in Viva Rock Vegas, The (2000)', 'Stop! Or My Mom Will Shoot (1992)', 'Queen of the Damned (2002)', "Jane Austen's Mafia! (1998)", 'Undercover Brother (2002)'], ['Before Sunrise (1995)', 'Doctor Zhivago (1965)', 'Dogville (2003)', 'R

In [51]:
print(len(sets))

10


In [65]:
object2 = user(12)

In [66]:
object2.recommend(sets,'Speed (1994)',1.2)

['Speed (1994)',
 'Die Hard: With a Vengeance (1995)',
 'Stargate (1994)',
 'GoldenEye (1995)',
 'Star Trek: Generations (1994)',
 'Cliffhanger (1993)',
 'Happy Gilmore (1996)',
 'Addams Family Values (1993)',
 'Santa Clause, The (1994)',
 'Liar Liar (1997)']