In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data
from torch.autograd import Variable
import pickle
import joblib

In [2]:
course_name = pd.read_csv('Dataset/course_recommendation.csv')
course_ratings = pd.read_csv('Dataset/course_ratings.csv')

In [3]:
course_name.head()

Unnamed: 0,ID,COURSE_NAME
0,1,React
1,2,Vue
2,3,Angular
3,4,Keras
4,5,Redis


In [4]:
course_ratings.head()

Unnamed: 0,user_id,course_id,rating
0,1,1,1
1,1,2,1
2,1,3,1
3,1,4,0
4,1,5,1


In [5]:
training_set = pd.read_csv('Dataset/course_ratings.csv')
#test_set = pd.read_csv('Dataset/ml-100k/u1.test',delimiter='\t',header=None)

In [6]:
training_set.head()

Unnamed: 0,user_id,course_id,rating
0,1,1,1
1,1,2,1
2,1,3,1
3,1,4,0
4,1,5,1


In [7]:
training_set = np.array(training_set,dtype = 'int')

In [8]:
training_set

array([[ 1,  1,  1],
       [ 1,  2,  1],
       [ 1,  3,  1],
       ...,
       [20, 28,  0],
       [20, 29,  1],
       [20, 30,  1]])

In [9]:
nb_users = max(training_set[:,0])
nb_courses = max(training_set[:,1])
print(nb_users,nb_courses)

20 30


In [10]:
def convert(data):
    new_data = []
    for id_users in range(1,nb_users + 1):
        id_courses = data[:,1][data[:,0] == id_users]
        id_ratings = data[:,2][data[:,0] == id_users]
        ratings = np.zeros(nb_courses)
        ratings[id_courses - 1] = id_ratings
        new_data.append(list(ratings))
    return new_data

In [11]:
training_set = convert(training_set)

In [12]:
print(len(training_set))
print(len(training_set[0]))

20
30


In [13]:
training_set = torch.FloatTensor(training_set)

In [14]:
training_set[0]

tensor([ 1.,  1.,  1.,  0.,  1.,  1.,  0.,  0.,  1.,  1.,  0.,  0.,  1., -1.,
         1.,  1.,  1.,  1.,  0.,  0.,  0., -1., -1.,  1., -1., -1.,  1., -1.,
        -1., -1.])

In [15]:
class RBM():
    def __init__(self, nv, nh):
        self.W = torch.randn(nh, nv)
        self.a = torch.randn(1, nh)
        self.b = torch.randn(1, nv)
    def sample_h(self, x):
        wx = torch.mm(x, self.W.t())
        activation = wx + self.a.expand_as(wx)
        p_h_given_v = torch.sigmoid(activation)
        return p_h_given_v, torch.bernoulli(p_h_given_v)
    def sample_v(self, y):
        wy = torch.mm(y, self.W)
        activation = wy + self.b.expand_as(wy)
        p_v_given_h = torch.sigmoid(activation)
        return p_v_given_h, torch.bernoulli(p_v_given_h)
    def train(self, v0, vk, ph0, phk):
        self.W += (torch.mm(v0.t(),ph0) - torch.mm(vk.t(),phk)).t()
        self.b += torch.sum((v0 - vk), 0)
        self.a += torch.sum((ph0 - phk), 0)
    def predict(self, x):
        _, h = self.sample_h(x)
        _, v = self.sample_v(h)
        return v

In [16]:
nv = len(training_set[0])
print(nv)
nh = 100
batch_size = 4
rbm = RBM(nv, nh)

30


In [17]:
nb_epoch = 5
for epoch in range(1, nb_epoch + 1):
    train_loss = 0
    rmse_tr = 0
    s = 0.
    for id_user in range(0, nb_users - batch_size, batch_size):
        vk = training_set[id_user:id_user+batch_size]
        v0 = training_set[id_user:id_user+batch_size]
        ph0,_ = rbm.sample_h(v0)
        for k in range(10):
            _,hk = rbm.sample_h(vk)
            _,vk = rbm.sample_v(hk)
            vk[v0<0] = v0[v0<0]
        phk,_ = rbm.sample_h(vk)
        rbm.train(v0, vk, ph0, phk)
        train_loss += torch.mean(torch.abs(v0[v0>=0] - vk[v0>=0]))
        rmse_tr += np.sqrt(torch.mean((v0[v0>=0] - vk[v0>=0])**2))
        s += 1.
    print('Epoch: '+str(epoch)+' , Training Loss: '+str(train_loss/s))
    print('RMSE: '+str(rmse_tr))

Epoch: 1 , Training Loss: tensor(0.4654)
RMSE: tensor(2.7217)
Epoch: 2 , Training Loss: tensor(0.3443)
RMSE: tensor(2.3449)
Epoch: 3 , Training Loss: tensor(0.3105)
RMSE: tensor(2.2050)
Epoch: 4 , Training Loss: tensor(0.2987)
RMSE: tensor(2.1790)
Epoch: 5 , Training Loss: tensor(0.2121)
RMSE: tensor(1.8378)


In [18]:
course_title = course_name.iloc[:,1:2]

In [19]:
course_title = pd.DataFrame.transpose(course_title)

In [20]:
course_title

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
COURSE_NAME,React,Vue,Angular,Keras,Redis,Materialize,PyTorch,Streamlit,Web Dev,Flutter,...,React Native,NLP,OpenCV,PHP,Recommendor System,Swift,JavaScript,AI,Blockchain,Quantum Computing


In [21]:
training_set[4]

tensor([ 1.,  1.,  0.,  1.,  0.,  0.,  1., -1., -1.,  0.,  0.,  1.,  1.,  0.,
         1.,  1., -1., -1.,  1.,  1.,  0.,  1., -1.,  0.,  1.,  0., -1.,  1.,
         1., -1.])

In [22]:
user_id = 5
user_input = Variable(training_set[user_id - 1]).unsqueeze(0)
user_input

tensor([[ 1.,  1.,  0.,  1.,  0.,  0.,  1., -1., -1.,  0.,  0.,  1.,  1.,  0.,
          1.,  1., -1., -1.,  1.,  1.,  0.,  1., -1.,  0.,  1.,  0., -1.,  1.,
          1., -1.]])

In [23]:
# torch.save(rbm,"rbm_model_torch.pt")
# model = torch.load("rbm_model_torch.pt")
# output1 = model.predict(user_input)
# output1

In [24]:
filename = 'RBM_Model.pkl'
pickle.dump(rbm, open(filename, 'wb'))
 
# load the model from disk
loaded_model = pickle.load(open(filename, 'rb'))

In [25]:
loaded_model

<__main__.RBM at 0x18358944a90>

In [26]:
# filename = 'RBM_Model_joblib.sav'
# joblib.dump(rbm, filename)

# loaded_model_joblib = joblib.load(filename)
# print(loaded_model_joblib)

In [27]:
output = loaded_model.predict(user_input)
output = output.data.numpy()
output

array([[1., 1., 1., 1., 0., 0., 1., 1., 0., 0., 0., 1., 1., 1., 1., 1.,
        1., 0., 1., 1., 0., 1., 1., 0., 1., 0., 0., 1., 1., 0.]],
      dtype=float32)

In [28]:
input_output = np.vstack([course_title, user_input, output])

In [29]:
input_output

array([['React', 'Vue', 'Angular', 'Keras', 'Redis', 'Materialize',
        'PyTorch', 'Streamlit', 'Web Dev', 'Flutter', 'Kotlin', 'Java',
        'Python', 'C/C++', 'DevOps', 'Kubernets', 'Docker', 'PowerBI',
        'ML', 'DL', 'React Native', 'NLP', 'OpenCV', 'PHP',
        'Recommendor System', 'Swift', 'JavaScript', 'AI', 'Blockchain',
        'Quantum Computing'],
       [1.0, 1.0, 0.0, 1.0, 0.0, 0.0, 1.0, -1.0, -1.0, 0.0, 0.0, 1.0,
        1.0, 0.0, 1.0, 1.0, -1.0, -1.0, 1.0, 1.0, 0.0, 1.0, -1.0, 0.0,
        1.0, 0.0, -1.0, 1.0, 1.0, -1.0],
       [1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0,
        1.0, 1.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0,
        0.0, 1.0, 1.0, 0.0]], dtype=object)

In [30]:
input_output = pd.DataFrame(input_output)

In [31]:
input_output

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,20,21,22,23,24,25,26,27,28,29
0,React,Vue,Angular,Keras,Redis,Materialize,PyTorch,Streamlit,Web Dev,Flutter,...,React Native,NLP,OpenCV,PHP,Recommendor System,Swift,JavaScript,AI,Blockchain,Quantum Computing
1,1,1,0,1,0,0,1,-1,-1,0,...,0,1,-1,0,1,0,-1,1,1,-1
2,1,1,1,1,0,0,1,1,0,0,...,0,1,1,0,1,0,0,1,1,0


In [32]:
input_output = pd.DataFrame.transpose(input_output)

In [33]:
input_output

Unnamed: 0,0,1,2
0,React,1,1
1,Vue,1,1
2,Angular,0,1
3,Keras,1,1
4,Redis,0,0
5,Materialize,0,0
6,PyTorch,1,1
7,Streamlit,-1,1
8,Web Dev,-1,0
9,Flutter,0,0


In [34]:
# input_output = input_output[input_output.iloc[:, 2] == 1]
# input_output

In [35]:
input_output = input_output[input_output.iloc[:, 1] == -1]
input_output

Unnamed: 0,0,1,2
7,Streamlit,-1,1
8,Web Dev,-1,0
16,Docker,-1,1
17,PowerBI,-1,0
22,OpenCV,-1,1
26,JavaScript,-1,0
29,Quantum Computing,-1,0


In [36]:
input_output = input_output[input_output.iloc[:, 2] == 1]
input_output

Unnamed: 0,0,1,2
7,Streamlit,-1,1
16,Docker,-1,1
22,OpenCV,-1,1


In [37]:
input_output = input_output.iloc[:, 0:1]
input_output

Unnamed: 0,0
7,Streamlit
16,Docker
22,OpenCV


In [38]:
input_output.columns = ['Recommended Courses']
input_output

Unnamed: 0,Recommended Courses
7,Streamlit
16,Docker
22,OpenCV
