In [2]:
import pandas as pd
import numpy as np

In [3]:
movies = pd.read_csv("./data/movies.csv")
ratings = pd.read_csv("./data/ratings.csv")

# A. showing some data

## 5 first movies

In [4]:
movies.head(5)

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


# 5 last movies

In [5]:
movies.tail(5)

Unnamed: 0,movieId,title,genres
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
9739,193585,Flint (2017),Drama
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation
9741,193609,Andrew Dice Clay: Dice Rules (1991),Comedy


## 5 first ratings

In [6]:
ratings.head(5)

Unnamed: 0,userId,movieId,rating,timestamp
0,1,1,4.0,964982703
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931


## 5 last ratings

In [7]:
ratings.tail(5)

Unnamed: 0,userId,movieId,rating,timestamp
100831,610,166534,4.0,1493848402
100832,610,168248,5.0,1493850091
100833,610,168250,5.0,1494273047
100834,610,168252,5.0,1493846352
100835,610,170875,3.0,1493846415


## movies dataset shape

In [8]:
movies.shape

(9742, 3)

## ratings dataset shape

In [9]:
ratings.shape

(100836, 4)

### create a colmun in movie dataset

In [12]:
movies["list_index"] = movies.index

# B. merge the two datasets

In [18]:
df = pd.merge(movies, ratings, on="movieId")

In [19]:
df.head()

Unnamed: 0,movieId,title,genres,list_index,userId,rating,timestamp
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,1,4.0,964982703
1,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,5,4.0,847434962
2,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,7,4.5,1106635946
3,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,15,2.5,1510577970
4,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,0,17,4.5,1305696483


In [20]:
df.shape


(100836, 7)

# C. Delete the extra rows

title, generes and timestamp seems to have no use looking forward

In [21]:
df = df.drop(columns=["title", "genres", "timestamp", "list_index"])


In [22]:
df.head()

Unnamed: 0,movieId,userId,rating
0,1,1,4.0
1,1,5,4.0
2,1,7,4.5
3,1,15,2.5
4,1,17,4.5


# D. Group by `userId`

In [32]:
groups = df.groupby("userId")
groups.count().head()

Unnamed: 0_level_0,movieId,rating
userId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,232,232
2,29,29
3,39,39
4,216,216
5,44,44


# E. Normalize the ratings

In [24]:
from sklearn import preprocessing

In [25]:
df[["rating"]]

Unnamed: 0,rating
0,4.0
1,4.0
2,4.5
3,2.5
4,4.5
...,...
100831,4.0
100832,3.5
100833,3.5
100834,3.5


In [27]:
scaler = preprocessing.StandardScaler()
train_x = scaler.fit_transform(df[["rating"]])

In [28]:
train_x

array([[ 0.47811176],
       [ 0.47811176],
       [ 0.95771699],
       ...,
       [-0.00149347],
       [-0.00149347],
       [ 0.47811176]])

In [None]:
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from torch.autograd import Variable
import torch.optim as optim
import torch.nn.functional as F
import torch.nn as nn
import torch.utils.data
import torch
from sklearn import preprocessing
import pandas as pd
import numpy as np

In [None]:

class RBM(nn.Module):
    def __init__(self,
                 n_vis=9724,
                 n_hin=20,
                 k=5):
        super(RBM, self).__init__()
        self.W = nn.Parameter(torch.randn(n_hin, n_vis)*1e-2)
        self.v_bias = nn.Parameter(torch.zeros(n_vis))
        self.h_bias = nn.Parameter(torch.zeros(n_hin))
        self.k = k

    def sample_from_p(self, p):
        return F.relu(torch.sign(p - Variable(torch.rand(p.size()))))

    def v_to_h(self, v):
        p_h = F.sigmoid(F.linear(v, self.W, self.h_bias))
        sample_h = self.sample_from_p(p_h)
        return p_h, sample_h

    def h_to_v(self, h):
        p_v = F.sigmoid(F.linear(h, self.W.t(), self.v_bias))
        sample_v = self.sample_from_p(p_v)
        return p_v, sample_v

    def forward(self, v):
        pre_h1, h1 = self.v_to_h(v)

        h_ = h1
        for _ in range(self.k):
            pre_v_, v_ = self.h_to_v(h_)
            pre_h_, h_ = self.v_to_h(v_)

        return v, v_

    def free_energy(self, v):
        vbias_term = v.mv(self.v_bias)
        wx_b = F.linear(v, self.W, self.h_bias)
        hidden_term = wx_b.exp().add(1).log().sum(1)
        return (-hidden_term - vbias_term).mean()