In [1]:
import pandas as pd
import numpy as np
import torch
from torch.autograd import variable
from tqdm import tqdm_notebook as tqdm
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader 
from sklearn.cluster import KMeans


In [2]:
class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        # create user embeddings
        self.user_factors = torch.nn.Embedding(n_users, n_factors) # think of this as a lookup table for the input.
        # create item embeddings
        self.item_factors = torch.nn.Embedding(n_items, n_factors) # think of this as a lookup table for the input.
        self.user_factors.weight.data.uniform_(0, 0.05)
        self.item_factors.weight.data.uniform_(0, 0.05)
        
    def forward(self, data):
        # matrix multiplication
        users, items = data[:,0], data[:,1]
        return (self.user_factors(users)*self.item_factors(items)).sum(1)
    # def forward(self, user, item):
    # 	# matrix multiplication
    #     return (self.user_factors(user)*self.item_factors(item)).sum(1)
    
    def predict(self, user, item):
        return self.forward(user, item)

In [3]:
class Loader(Dataset):
    def __init__(self, ratings_df):
        self.ratings = ratings_df.copy()
        
        # Extract all user IDs and movie IDs
        users = ratings_df.userId.unique()
        movies = ratings_df.movieId.unique()
        
        #--- Producing new continuous IDs for users and movies ---
        
        # Unique values : index
        self.userid2idx = {o:i for i,o in enumerate(users)}
        self.movieid2idx = {o:i for i,o in enumerate(movies)}
        
        # Obtained continuous ID for users and movies
        self.idx2userid = {i:o for o,i in self.userid2idx.items()}
        self.idx2movieid = {i:o for o,i in self.movieid2idx.items()}
        
        # return the id from the indexed values as noted in the lambda function down below.
        self.ratings.movieId = ratings_df.movieId.apply(lambda x: self.movieid2idx[x])
        self.ratings.userId = ratings_df.userId.apply(lambda x: self.userid2idx[x])
        
        
        self.x = self.ratings.drop(['rating', 'timestamp'], axis=1).values
        self.y = self.ratings['rating'].values
        self.x, self.y = torch.tensor(self.x), torch.tensor(self.y) # Transforms the data to tensors (ready for torch models.)

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.ratings)

In [4]:
class recommender():
    def __init__(self,ratings_df, n_users, n_items,  num_epochs= 128, lr= 1e-3, n_factors= 8):
        self.num_epochs = num_epochs
        self.lr = lr
        self.n_users = n_users
        self.n_items = n_items
        self.n_factors = n_factors

    def clusters(self):
        cuda = torch.cuda.is_available()

        print("Is running on GPU:", cuda)
        
        model = MatrixFactorization(self.n_users, self.n_items, self.n_factors)
        print(model)
        for name, param in model.named_parameters():
            if param.requires_grad:
                print(name, param.data)
        # GPU enable if you have a GPU...
        if cuda:
            model = model.cuda()
        
        # MSE loss
        loss_fn = torch.nn.MSELoss()
        
        # ADAM optimizier
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
        
        # Train data
        train_set = Loader(ratings_df)
        train_loader = DataLoader(train_set, 128, shuffle=True)

        for it in tqdm(range(self.num_epochs)):
            losses = []
            for x, y in train_loader:
                if cuda:
                    x, y = x.cuda(), y.cuda()
                else:
                    x, y = x.cpu(), y.cpu()
                    
                optimizer.zero_grad()
                outputs = model(x)
                loss = loss_fn(outputs.squeeze(), y.type(torch.float32))
                losses.append(loss.item())
                loss.backward()
                optimizer.step()
                
            print("iter #{}".format(it), "Loss:", sum(losses) / len(losses))

        # By training the model, we will have tuned latent factors for movies and users.
        c = 0
        uw = 0
        iw = 0 
        for name, param in model.named_parameters():
            if param.requires_grad:
                print(name, param.data)
                if c == 0:
                  uw = param.data
                  c +=1
                else:
                  iw = param.data
                #print('param_data', param_data)
        
        trained_movie_embeddings = model.item_factors.weight.data.cpu().numpy()
        
        kmeans = KMeans(n_clusters=10, random_state=0).fit(trained_movie_embeddings)

        sets = []
        for cluster in range(10):
          #print("Cluster #{}".format(cluster))
          movs = []
          now = []    
          for movidx in np.where(kmeans.labels_ == cluster)[0]:
            movid = train_set.idx2movieid[movidx]
            rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
            movs.append((movie_names[movid], rat_count))
          for mov in sorted(movs, key=lambda tup: tup[1], reverse=True)[:10]:
              
            print("\t", mov[0])
            now.append(mov[0])
          sets.append(now)
        return sets  
    

In [5]:
class user():
    def __init__(self, user_id):
        self.user_id = user_id

    def recommend(self, sets, product_id, timing):
        if timing > 1:
            for i in range(len(sets)):
                for j in range(len(sets[0])):
                    if product_id == sets[i][j]:
                        return sets[i]
        return  

In [6]:
#! curl http://files.grouplens.org/datasets/movielens/ml-latest-small.zip -o ml-latest-small.zip

In [7]:
import zipfile
with zipfile.ZipFile('ml-latest-small.zip', 'r') as zip_ref:
    zip_ref.extractall('data')

In [8]:
movies_df = pd.read_csv('data/ml-latest-small/movies.csv')
ratings_df = pd.read_csv('data/ml-latest-small/ratings.csv')

In [9]:
print('The dimensions of movies dataframe are:', movies_df.shape,'\nThe dimensions of ratings dataframe are:', ratings_df.shape)

The dimensions of movies dataframe are: (9742, 3) 
The dimensions of ratings dataframe are: (100836, 4)


In [10]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [11]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [12]:
movie_names = movies_df.set_index('movieId')['title'].to_dict()
n_users = len(ratings_df.userId.unique())
n_items = len(ratings_df.movieId.unique())
print("Number of unique users:", n_users)
print("Number of unique movies:", n_items)
print("The full rating matrix will have:", n_users*n_items, 'elements.')
print('----------')
print("Number of ratings:", len(ratings_df))
print("Therefore: ", len(ratings_df) / (n_users*n_items) * 100, '% of the matrix is filled.')

Number of unique users: 610
Number of unique movies: 9724
The full rating matrix will have: 5931640 elements.
----------
Number of ratings: 100836
Therefore:  1.6999683055613624 % of the matrix is filled.


In [13]:
object1 = recommender(ratings_df,n_users,n_items)


In [14]:
sets = object1.clusters()
#print(sets)

Is running on GPU: False
MatrixFactorization(
  (user_factors): Embedding(610, 8)
  (item_factors): Embedding(9724, 8)
)
user_factors.weight tensor([[0.0156, 0.0247, 0.0106,  ..., 0.0300, 0.0418, 0.0451],
        [0.0010, 0.0169, 0.0455,  ..., 0.0017, 0.0382, 0.0051],
        [0.0114, 0.0332, 0.0454,  ..., 0.0023, 0.0165, 0.0045],
        ...,
        [0.0215, 0.0148, 0.0279,  ..., 0.0128, 0.0199, 0.0084],
        [0.0385, 0.0477, 0.0190,  ..., 0.0190, 0.0191, 0.0034],
        [0.0377, 0.0140, 0.0346,  ..., 0.0160, 0.0272, 0.0278]])
item_factors.weight tensor([[0.0001, 0.0018, 0.0465,  ..., 0.0242, 0.0030, 0.0209],
        [0.0122, 0.0476, 0.0093,  ..., 0.0158, 0.0133, 0.0487],
        [0.0330, 0.0213, 0.0445,  ..., 0.0034, 0.0424, 0.0429],
        ...,
        [0.0376, 0.0451, 0.0484,  ..., 0.0446, 0.0362, 0.0064],
        [0.0295, 0.0270, 0.0124,  ..., 0.0264, 0.0288, 0.0017],
        [0.0099, 0.0024, 0.0438,  ..., 0.0432, 0.0046, 0.0236]])


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for it in tqdm(range(self.num_epochs)):


  0%|          | 0/128 [00:00<?, ?it/s]

iter #0 Loss: 11.063829560570305
iter #1 Loss: 4.739663501378849
iter #2 Loss: 2.4729934670598372
iter #3 Loss: 1.7196876176722764
iter #4 Loss: 1.345562351763551
iter #5 Loss: 1.1284031658457014
iter #6 Loss: 0.9911790757161106
iter #7 Loss: 0.9001027992382873
iter #8 Loss: 0.8370348143244758
iter #9 Loss: 0.7922862972024128
iter #10 Loss: 0.7593225390475413
iter #11 Loss: 0.7344365471283797
iter #12 Loss: 0.7159448192658158
iter #13 Loss: 0.7015106977166863
iter #14 Loss: 0.6905065145589373
iter #15 Loss: 0.6818178763168717
iter #16 Loss: 0.6748230669099062
iter #17 Loss: 0.6696978923935576
iter #18 Loss: 0.6658463807502374
iter #19 Loss: 0.6628732982701456
iter #20 Loss: 0.6605902155341231
iter #21 Loss: 0.6589259695885751
iter #22 Loss: 0.6577078273075486
iter #23 Loss: 0.6564887007556591
iter #24 Loss: 0.6558567144468351
iter #25 Loss: 0.6549704925436054
iter #26 Loss: 0.6543171046878481
iter #27 Loss: 0.6529870180762963
iter #28 Loss: 0.6521942840161057
iter #29 Loss: 0.651198779

  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Ace Ventura: Pet Detective (1994)
	 Mrs. Doubtfire (1993)
	 Die Hard: With a Vengeance (1995)
	 Dumb & Dumber (Dumb and Dumber) (1994)
	 Home Alone (1990)
	 Interview with the Vampire: The Vampire Chronicles (1994)
	 Indiana Jones and the Temple of Doom (1984)
	 Outbreak (1995)
	 Happy Gilmore (1996)
	 Ace Ventura: When Nature Calls (1995)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Forrest Gump (1994)
	 Shawshank Redemption, The (1994)
	 Silence of the Lambs, The (1991)
	 Matrix, The (1999)
	 Star Wars: Episode IV - A New Hope (1977)
	 Braveheart (1995)
	 Terminator 2: Judgment Day (1991)
	 Star Wars: Episode V - The Empire Strikes Back (1980)
	 Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
	 Star Wars: Episode VI - Return of the Jedi (1983)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Jurassic Park (1993)
	 Toy Story (1995)
	 Apollo 13 (1995)
	 Batman (1989)
	 Aladdin (1992)
	 Lion King, The (1994)
	 Speed (1994)
	 Gladiator (2000)
	 Shrek (2001)
	 Men in Black (a.k.a. MIB) (1997)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Schindler's List (1993)
	 One Flew Over the Cuckoo's Nest (1975)
	 Willy Wonka & the Chocolate Factory (1971)
	 Crouching Tiger, Hidden Dragon (Wo hu cang long) (2000)
	 2001: A Space Odyssey (1968)
	 Four Weddings and a Funeral (1994)
	 Casablanca (1942)
	 Nightmare Before Christmas, The (1993)
	 Wizard of Oz, The (1939)
	 Natural Born Killers (1994)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Dances with Wolves (1990)
	 Titanic (1997)
	 Pretty Woman (1990)
	 Ghost (1990)
	 Sleepless in Seattle (1993)
	 Clueless (1995)
	 As Good as It Gets (1997)
	 Back to the Future Part III (1990)
	 Dead Poets Society (1989)
	 American President, The (1995)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Godzilla (1998)
	 Next Karate Kid, The (1994)
	 Shark Tale (2004)
	 Stepford Wives, The (2004)
	 Kazaam (1996)
	 Dukes of Hazzard, The (2005)
	 Problem Child (1990)
	 Queen of the Damned (2002)
	 Alexander (2004)
	 Freddy Got Fingered (2001)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Pulp Fiction (1994)
	 Fight Club (1999)
	 Usual Suspects, The (1995)
	 American Beauty (1999)
	 Seven (a.k.a. Se7en) (1995)
	 Lord of the Rings: The Fellowship of the Ring, The (2001)
	 Godfather, The (1972)
	 Lord of the Rings: The Return of the King, The (2003)
	 Fargo (1996)
	 Twelve Monkeys (a.k.a. 12 Monkeys) (1995)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Independence Day (a.k.a. ID4) (1996)
	 True Lies (1994)
	 Pirates of the Caribbean: The Curse of the Black Pearl (2003)
	 Stargate (1994)
	 Star Wars: Episode I - The Phantom Menace (1999)
	 X-Men (2000)
	 Twister (1996)
	 Spider-Man (2002)
	 Batman Begins (2005)
	 Clear and Present Danger (1994)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Matrix Reloaded, The (2003)
	 Star Wars: Episode II - Attack of the Clones (2002)
	 Meet the Parents (2000)
	 Hunt for Red October, The (1990)
	 Airplane! (1980)
	 Starship Troopers (1997)
	 Matrix Revolutions, The (2003)
	 In the Line of Fire (1993)
	 Scream (1996)
	 Scarface (1983)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Mission: Impossible (1996)
	 Mask, The (1994)
	 Batman Forever (1995)
	 Rock, The (1996)
	 Waterworld (1995)
	 Net, The (1995)
	 Jumanji (1995)
	 Star Trek: Generations (1994)
	 Crimson Tide (1995)
	 Cliffhanger (1993)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

In [15]:
print(sets)

[['Ace Ventura: Pet Detective (1994)', 'Mrs. Doubtfire (1993)', 'Die Hard: With a Vengeance (1995)', 'Dumb & Dumber (Dumb and Dumber) (1994)', 'Home Alone (1990)', 'Interview with the Vampire: The Vampire Chronicles (1994)', 'Indiana Jones and the Temple of Doom (1984)', 'Outbreak (1995)', 'Happy Gilmore (1996)', 'Ace Ventura: When Nature Calls (1995)'], ['Forrest Gump (1994)', 'Shawshank Redemption, The (1994)', 'Silence of the Lambs, The (1991)', 'Matrix, The (1999)', 'Star Wars: Episode IV - A New Hope (1977)', 'Braveheart (1995)', 'Terminator 2: Judgment Day (1991)', 'Star Wars: Episode V - The Empire Strikes Back (1980)', 'Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)', 'Star Wars: Episode VI - Return of the Jedi (1983)'], ['Jurassic Park (1993)', 'Toy Story (1995)', 'Apollo 13 (1995)', 'Batman (1989)', 'Aladdin (1992)', 'Lion King, The (1994)', 'Speed (1994)', 'Gladiator (2000)', 'Shrek (2001)', 'Men in Black (a.k.a. MIB) (1997)'], ["Schindler's L

In [16]:
print(len(sets))

10


In [17]:
object2 = user(12)

In [18]:
object2.recommend(sets,'Speed (1994)',1.2)

['Jurassic Park (1993)',
 'Toy Story (1995)',
 'Apollo 13 (1995)',
 'Batman (1989)',
 'Aladdin (1992)',
 'Lion King, The (1994)',
 'Speed (1994)',
 'Gladiator (2000)',
 'Shrek (2001)',
 'Men in Black (a.k.a. MIB) (1997)']