In [1]:
import torch
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [2]:
import json
with open("RatingData_Train_Network.json", "r") as train_json:
    train_python = json.load(train_json)
with open("RatingData_Test_Network.json", "r") as test_network_json:
    test_network_python = json.load(test_network_json)

temp_student_id_network = test_network_python[0]['student_id']
    
list_to_pd = []

for i in range (0, len(train_python)):
    list_to_pd.append([int(train_python[i]['student_id']), int(train_python[i]['course_id']), train_python[i]['ratings']])

for i in range(0, len(test_network_python)):
    list_to_pd.append([int(train_python[len(train_python)-1]['student_id'])+1, int(test_network_python[i]['course_id']), test_network_python[i]['ratings']])

    
df = pd.DataFrame(list_to_pd, columns=['student_id', 'course_id', 'ratings'])

df

Unnamed: 0,student_id,course_id,ratings
0,1,1,4
1,1,2,4
2,1,3,4
3,1,4,5
4,1,5,3
...,...,...,...
435,11,36,0
436,11,37,0
437,11,38,0
438,11,39,0


In [3]:
list_to_test_network = []

list_zero_score_course_id = []

for i in range(0, len(test_network_python)):
    list_to_test_network.append([int(1), int(test_network_python[i]['course_id']), test_network_python[i]['ratings']])
    if test_network_python[i]['ratings'] == 0 :
        list_zero_score_course_id.append(int(test_network_python[i]['course_id']))

test_network_df = pd.DataFrame(list_to_test_network, columns=['student_id', 'course_id', 'ratings'])

test_network_df

Unnamed: 0,student_id,course_id,ratings
0,1,1,4
1,1,2,4
2,1,3,3
3,1,4,5
4,1,5,3
5,1,6,3
6,1,7,4
7,1,8,3
8,1,9,5
9,1,10,2


In [4]:
class AIRDataSet(Dataset):
    def __init__(self, datapath):
        self.data_pd = datapath
        self.items = torch.LongTensor(self.data_pd['course_id'])
        self.users = torch.LongTensor(self.data_pd['student_id'])
        self.ratings = torch.FloatTensor(self.data_pd['ratings'])
        print(len(self.ratings))
        
    def __len__(self):
        return len(self.ratings)
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]
    
    def get_datasize(self):
        return self.users.max() + 1, self.items.max() + 1, len(self.ratings)

train_data = AIRDataSet(df)
test_data_network = AIRDataSet(test_network_df)

batch_size = 40
train_loader = DataLoader(train_data, batch_size = batch_size, shuffle = False)
test_loader_network = DataLoader(test_data_network, batch_size = batch_size, shuffle = False)

440
40


In [5]:
n_users, n_items, n_ratings = train_data.get_datasize()
_, _, n_ratings_test_network = test_data_network.get_datasize()


print(n_users, n_items, n_ratings, n_ratings_test_network)

tensor(12) tensor(41) 440 40


In [6]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, rank=1):
        super().__init__()
        self.U = torch.nn.Parameter(torch.randn(num_users, rank))
        self.V = torch.nn.Parameter(torch.randn(num_items, rank))
        
    def forward(self, users, items):
        ratings = torch.sum(self.U[users]*self.V[items], dim=-1)
        return ratings

In [7]:
mf_model = MF(n_users, n_items, rank=1)
optimizer = torch.optim.Adam(mf_model.parameters(), lr=0.01)
criterion = nn.MSELoss()

for epoch in range(200):
    cost = 0
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        ratings_pred = mf_model(users, items)
        loss = criterion(ratings_pred, ratings)
        loss.backward()
        optimizer.step()
        cost += loss.item() * len(ratings)
        
    cost /= n_ratings
    
    #print(f"Epoch : {epoch}")
    #print("train cost : {:.6f}".format(cost))
    
for users, items, ratings in test_loader_network:
    cost_test = 0
    ratings_pred = mf_model(users, items)
    loss = criterion(ratings_pred, ratings)
    cost_test += loss.item() * len(ratings)
    print(ratings)
    print(ratings_pred)

tensor([4., 4., 3., 5., 3., 3., 4., 3., 5., 2., 2., 2., 2., 3., 2., 2., 2., 2.,
        3., 5., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])
tensor([3.9634, 4.0555, 4.0070, 5.1946, 3.1160, 3.0180, 3.2641, 3.0124, 5.1945,
        2.1817, 1.9832, 2.0770, 2.2797, 2.0282, 2.3744, 2.3786, 1.7864, 1.9764,
        2.7182, 5.1942, 0.9927, 1.2830, 1.3914, 0.9925, 1.4929, 0.9930, 4.9589,
        3.6757, 3.7708, 3.7676, 3.7746, 3.4710, 3.8664, 3.8703, 3.6722, 3.8692,
        3.6774, 3.9642, 3.8681, 4.9587], grad_fn=<SumBackward1>)


In [8]:
my_string = ""
loop_start = list_zero_score_course_id[0]
loop_end = list_zero_score_course_id[-1]

for i in list_zero_score_course_id:
    if i == loop_end:
        my_string += '{}"student_id":{},"course_id":{},"grade":{},"major":"software","keywords":"security","ratings":{}{}{}'.format("{", temp_student_id_network, int(train_python[i-1]['course_id']), int(train_python[i-1]['grade']), round(float(ratings_pred[i-1]), 4), "}", "]")
    elif i == loop_start:
        my_string += '{}{}"student_id":{},"course_id":{},"grade":{},"major":"software","keywords":"security","ratings":{}{},'.format("[", "{", temp_student_id_network, int(train_python[i-1]['course_id']), int(train_python[i-1]['grade']), round(float(ratings_pred[i-1]), 4), "}")
    else:
        my_string += '{}"student_id":{},"course_id":{},"grade":{},"major":"software","keywords":"security","ratings":{}{},'.format("{", temp_student_id_network, int(train_python[i-1]['course_id']), int(train_python[i-1]['grade']), round(float(ratings_pred[i-1]), 4), "}")

In [9]:
with open("./Result_Network.json", 'w') as outfile:
    json.dump(json.loads(my_string), outfile, indent=4)