In [3]:
import pandas as pd 


In [4]:
movies_df = pd.read_csv("ml-latest-small/movies.csv")
ratings_df = pd.read_csv("ml-latest-small/ratings.csv")

In [5]:
print(f"Dimension of movies {movies_df.shape}\nDimension of ratings {ratings_df.shape}")

Dimension of movies (9742, 3)
Dimension of ratings (100836, 4)


In [6]:
n_users = len(ratings_df.userId.unique())
n_items = len(ratings_df.movieId.unique())
movie_names = movies_df.set_index('movieId')['title'].to_dict()

In [7]:
ratings_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


In [8]:
import torch
import numpy as np
from torch.autograd import Variable
from tqdm.notebook import tqdm



class MatrixFactorization(torch.nn.Module):
    def __init__(self, n_users, n_items, n_factors=20):
        super().__init__()
        # create user embeddings
        self.user_factors = torch.nn.Embedding(n_users, n_factors) # think of this as a lookup table for the input.
        # create item embeddings
        self.item_factors = torch.nn.Embedding(n_items, n_factors) # think of this as a lookup table for the input.
        self.user_factors.weight.data.uniform_(0, 0.05)
        self.item_factors.weight.data.uniform_(0, 0.05)
        
    def forward(self, data):
        # matrix multiplication
        users, items = data[:,0], data[:,1]
        return (self.user_factors(users)*self.item_factors(items)).sum(1)

    def predict(self, user, item):
        return self.forward(user, item)

In [9]:
# Creating the dataloader (necessary for PyTorch)
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader # package that helps transform your data to machine learning readiness

class Loader(Dataset):
    def __init__(self):
        self.ratings = ratings_df.copy()
        
        # Extract all user IDs and movie IDs
        users = ratings_df.userId.unique()
        movies = ratings_df.movieId.unique()
        
        #--- Producing new continuous IDs for users and movies ---
        
        # Unique values : index
        self.userid2idx = {o:i for i,o in enumerate(users)}
        self.movieid2idx = {o:i for i,o in enumerate(movies)}
        
        # Obtained continuous ID for users and movies
        self.idx2userid = {i:o for o,i in self.userid2idx.items()}
        self.idx2movieid = {i:o for o,i in self.movieid2idx.items()}
        
        # return the id from the indexed values as noted in the lambda function down below.
        self.ratings.movieId = ratings_df.movieId.apply(lambda x: self.movieid2idx[x])
        self.ratings.userId = ratings_df.userId.apply(lambda x: self.userid2idx[x])
        
        
        self.x = self.ratings.drop(['rating', 'timestamp'], axis=1).values
        self.y = self.ratings['rating'].values
        self.x, self.y = torch.tensor(self.x), torch.tensor(self.y) # Transforms the data to tensors (ready for torch models.)

    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.ratings)
     

In [10]:
num_epochs = 200
cuda = torch.cuda.is_available()

print("Is running on GPU:", cuda)

model = MatrixFactorization(n_users, n_items, n_factors=8)
print(model)
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)
# GPU enable if you have a GPU...
if cuda:
    model = model.cuda()

# MSE loss
loss_fn = torch.nn.MSELoss()

# ADAM optimizier
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

# Train data
train_set = Loader()
train_loader = DataLoader(train_set, 128, shuffle=True)
     

Is running on GPU: False
MatrixFactorization(
  (user_factors): Embedding(610, 8)
  (item_factors): Embedding(9724, 8)
)
user_factors.weight tensor([[0.0339, 0.0424, 0.0033,  ..., 0.0484, 0.0145, 0.0398],
        [0.0292, 0.0480, 0.0374,  ..., 0.0423, 0.0263, 0.0407],
        [0.0481, 0.0307, 0.0074,  ..., 0.0358, 0.0405, 0.0075],
        ...,
        [0.0064, 0.0259, 0.0200,  ..., 0.0187, 0.0278, 0.0334],
        [0.0201, 0.0381, 0.0201,  ..., 0.0323, 0.0032, 0.0180],
        [0.0033, 0.0471, 0.0102,  ..., 0.0003, 0.0454, 0.0389]])
item_factors.weight tensor([[0.0310, 0.0219, 0.0137,  ..., 0.0088, 0.0292, 0.0117],
        [0.0396, 0.0394, 0.0187,  ..., 0.0066, 0.0427, 0.0359],
        [0.0393, 0.0190, 0.0435,  ..., 0.0478, 0.0077, 0.0350],
        ...,
        [0.0035, 0.0269, 0.0001,  ..., 0.0188, 0.0384, 0.0245],
        [0.0100, 0.0341, 0.0128,  ..., 0.0044, 0.0099, 0.0129],
        [0.0235, 0.0180, 0.0110,  ..., 0.0046, 0.0068, 0.0415]])


In [11]:
train_loader

<torch.utils.data.dataloader.DataLoader at 0x12d5d46af50>

In [12]:
for it in tqdm(range(num_epochs)):
    losses = []
    for x, y in train_loader:
         if cuda:
            x, y = x.cuda(), y.cuda()
            optimizer.zero_grad()
            outputs = model(x)
            loss = loss_fn(outputs.squeeze(), y.type(torch.float32))
            losses.append(loss.item())
            loss.backward()
            optimizer.step()

  0%|          | 0/200 [00:00<?, ?it/s]

In [17]:
# By training the model, we will have tuned latent factors for movies and users.
c = 0
uw = 0
iw = 0 
for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data)
        if c == 0:
          uw = param.data
          c +=1
        else:
          iw = param.data
        print('param_data', param.data)

user_factors.weight tensor([[0.0339, 0.0424, 0.0033,  ..., 0.0484, 0.0145, 0.0398],
        [0.0292, 0.0480, 0.0374,  ..., 0.0423, 0.0263, 0.0407],
        [0.0481, 0.0307, 0.0074,  ..., 0.0358, 0.0405, 0.0075],
        ...,
        [0.0064, 0.0259, 0.0200,  ..., 0.0187, 0.0278, 0.0334],
        [0.0201, 0.0381, 0.0201,  ..., 0.0323, 0.0032, 0.0180],
        [0.0033, 0.0471, 0.0102,  ..., 0.0003, 0.0454, 0.0389]])
item_factors.weight tensor([[0.0310, 0.0219, 0.0137,  ..., 0.0088, 0.0292, 0.0117],
        [0.0396, 0.0394, 0.0187,  ..., 0.0066, 0.0427, 0.0359],
        [0.0393, 0.0190, 0.0435,  ..., 0.0478, 0.0077, 0.0350],
        ...,
        [0.0035, 0.0269, 0.0001,  ..., 0.0188, 0.0384, 0.0245],
        [0.0100, 0.0341, 0.0128,  ..., 0.0044, 0.0099, 0.0129],
        [0.0235, 0.0180, 0.0110,  ..., 0.0046, 0.0068, 0.0415]])


In [14]:
trained_movie_embeddings = model.item_factors.weight.data.cpu().numpy()
len(trained_movie_embeddings) # unique movie factor weights

9724

In [15]:
from sklearn.cluster import KMeans
# Fit the clusters based on the movie weights
kmeans = KMeans(n_clusters=10, random_state=0).fit(trained_movie_embeddings)

In [16]:
for cluster in range(10):
  print("Cluster #{}".format(cluster))
  movs = []
  for movidx in np.where(kmeans.labels_ == cluster)[0]:
    movid = train_set.idx2movieid[movidx]
    rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
    movs.append((movie_names[movid], rat_count))
  for mov in sorted(movs, key=lambda tup: tup[1], reverse=True)[:10]:
    print("\t", mov[0])

Cluster #0


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Forrest Gump (1994)
	 American Beauty (1999)
	 Batman (1989)
	 Sixth Sense, The (1999)
	 True Lies (1994)
	 Speed (1994)
	 Dark Knight, The (2008)
	 Groundhog Day (1993)
	 Batman Forever (1995)
	 Rock, The (1996)
Cluster #1


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Star Wars: Episode IV - A New Hope (1977)
	 Braveheart (1995)
	 Lord of the Rings: The Fellowship of the Ring, The (2001)
	 Gladiator (2000)
	 Terminator, The (1984)
	 American History X (1998)
	 Spider-Man (2002)
	 Austin Powers: The Spy Who Shagged Me (1999)
	 Jumanji (1995)
	 Harry Potter and the Sorcerer's Stone (a.k.a. Harry Potter and the Philosopher's Stone) (2001)
Cluster #2


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Silence of the Lambs, The (1991)
	 Jurassic Park (1993)
	 Fugitive, The (1993)
	 Fargo (1996)
	 Twelve Monkeys (a.k.a. 12 Monkeys) (1995)
	 Mission: Impossible (1996)
	 Dumb & Dumber (Dumb and Dumber) (1994)
	 Monsters, Inc. (2001)
	 Twister (1996)
	 E.T. the Extra-Terrestrial (1982)
Cluster #3


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Shawshank Redemption, The (1994)
	 Fight Club (1999)
	 Usual Suspects, The (1995)
	 Saving Private Ryan (1998)
	 Alien (1979)
	 Reservoir Dogs (1992)
	 Kill Bill: Vol. 1 (2003)
	 Eternal Sunshine of the Spotless Mind (2004)
	 Babe (1995)
	 Beautiful Mind, A (2001)
Cluster #4


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Pulp Fiction (1994)
	 Independence Day (a.k.a. ID4) (1996)
	 Apollo 13 (1995)
	 Raiders of the Lost Ark (Indiana Jones and the Raiders of the Lost Ark) (1981)
	 Lion King, The (1994)
	 Beauty and the Beast (1991)
	 Die Hard (1988)
	 Die Hard: With a Vengeance (1995)
	 Princess Bride, The (1987)
	 Good Will Hunting (1997)
Cluster #5


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Schindler's List (1993)
	 Back to the Future (1985)
	 Shrek (2001)
	 Mrs. Doubtfire (1993)
	 Stargate (1994)
	 Indiana Jones and the Last Crusade (1989)
	 Star Wars: Episode I - The Phantom Menace (1999)
	 Godfather: Part II, The (1974)
	 Goodfellas (1990)
	 Blade Runner (1982)
Cluster #6


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Men in Black (a.k.a. MIB) (1997)
	 Pirates of the Caribbean: The Curse of the Black Pearl (2003)
	 Inception (2010)
	 Monty Python and the Holy Grail (1975)
	 Pretty Woman (1990)
	 X-Men (2000)
	 Amelie (Fabuleux destin d'Amélie Poulain, Le) (2001)
	 Bourne Identity, The (2002)
	 Kill Bill: Vol. 2 (2004)
	 2001: A Space Odyssey (1968)
Cluster #7


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Seven (a.k.a. Se7en) (1995)
	 Godfather, The (1972)
	 Lord of the Rings: The Two Towers, The (2002)
	 Lord of the Rings: The Return of the King, The (2003)
	 Titanic (1997)
	 One Flew Over the Cuckoo's Nest (1975)
	 Truman Show, The (1998)
	 Incredibles, The (2004)
	 Fifth Element, The (1997)
	 Green Mile, The (1999)
Cluster #8


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Matrix, The (1999)
	 Terminator 2: Judgment Day (1991)
	 Toy Story (1995)
	 Star Wars: Episode VI - Return of the Jedi (1983)
	 Ace Ventura: Pet Detective (1994)
	 Memento (2000)
	 Mask, The (1994)
	 Finding Nemo (2003)
	 Léon: The Professional (a.k.a. The Professional) (Léon) (1994)
	 Catch Me If You Can (2002)
Cluster #9


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating

	 Star Wars: Episode V - The Empire Strikes Back (1980)
	 Aladdin (1992)
	 Dances with Wolves (1990)
	 GoldenEye (1995)
	 Ghost (1990)
	 Clear and Present Danger (1994)
	 Four Weddings and a Funeral (1994)
	 American Pie (1999)
	 Harry Potter and the Prisoner of Azkaban (2004)
	 Big (1988)


  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = ratings_df.loc[ratings_df['movieId']==movid].count()[0]
  rat_count = rating