In [1]:
import torch
import pandas as pd
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [2]:
import json
with open("RatingData_Train_Security.json", "r") as train_json:
    train_python = json.load(train_json)
with open("RatingData_Test_Security.json", "r") as test_security_json:
    test_security_python = json.load(test_security_json)

list_to_pd = []

temp_student_id_security = test_security_python[0]['student_id']

for i in range (0, len(train_python)):
    list_to_pd.append([int(train_python[i]['student_id']), int(train_python[i]['course_id']), train_python[i]['ratings']])

for i in range(0, len(test_security_python)):
    list_to_pd.append([int(train_python[len(train_python)-1]['student_id'])+1, int(test_security_python[i]['course_id']), test_security_python[i]['ratings']])
    
df = pd.DataFrame(list_to_pd, columns=['student_id', 'course_id', 'ratings'])

df

Unnamed: 0,student_id,course_id,ratings
0,1,1,1
1,1,2,1
2,1,3,1
3,1,4,1
4,1,5,1
...,...,...,...
475,11,36,0
476,11,37,0
477,11,38,0
478,11,39,0


In [3]:
list_to_test_security = []

list_zero_score_course_id = []

for i in range(0, len(test_security_python)):
    list_to_test_security.append([int(1), int(test_security_python[i]['course_id']), test_security_python[i]['ratings']])
    if test_security_python[i]['ratings'] == 0 :
        list_zero_score_course_id.append(int(test_security_python[i]['course_id']))

test_security_df = pd.DataFrame(list_to_test_security, columns=['student_id', 'course_id', 'ratings'])

In [4]:
class AIRDataset(Dataset):
    def __init__(self, datapath):
        self.data_pd = datapath
        self.items = torch.LongTensor(self.data_pd['course_id'])
        self.users = torch.LongTensor(self.data_pd['student_id'])
        self.ratings = torch.FloatTensor(self.data_pd['ratings'])
        
    def __len__(self):
        return len(self.ratings)
    def __getitem__(self, idx):
        return self.users[idx], self.items[idx], self.ratings[idx]
    
    def get_datasize(self):
        return self.users.max() + 1, self.items.max() + 1, len(self.ratings)

train_data = AIRDataset(df)
test_data_security = AIRDataset(test_security_df)

batch_size = 40
train_loader = DataLoader(train_data, batch_size = batch_size, shuffle = False)
test_loader_security = DataLoader(test_data_security, batch_size = batch_size, shuffle = False)

In [5]:
n_users, n_items, n_ratings = train_data.get_datasize()
_, _, n_ratings_test_security = test_data_security.get_datasize()


print(n_users, n_items, n_ratings, n_ratings_test_security)

tensor(12) tensor(41) 480 40


In [6]:
class MF(nn.Module):
    def __init__(self, num_users, num_items, rank=1):
        super().__init__()
        self.U = torch.nn.Parameter(torch.randn(num_users, rank))
        self.V = torch.nn.Parameter(torch.randn(num_items, rank))
        
    def forward(self, users, items):
        ratings = torch.sum(self.U[users]*self.V[items], dim=-1)
        return ratings

In [14]:
mf_model = MF(n_users, n_items, rank=1)
optimizer = torch.optim.Adam(mf_model.parameters(), lr=0.01)
criterion = nn.MSELoss()

for epoch in range(200):
    cost = 0
    for users, items, ratings in train_loader:
        optimizer.zero_grad()
        ratings_pred = mf_model(users, items)
        loss = criterion(ratings_pred, ratings)
        loss.backward()
        optimizer.step()
        cost += loss.item() * len(ratings)
        
    cost /= n_ratings
    
    #print(f"Epoch : {epoch}")
    #print("train cost : {:.6f}".format(cost))
    

for users, items, ratings in test_loader_security:
    cost_test = 0
    ratings_pred = mf_model(users, items)
    loss = criterion(ratings_pred, ratings)
    cost_test += loss.item() * len(ratings)
    print(ratings)
    print(ratings_pred)

tensor([1., 1., 2., 1., 0., 0., 1., 1., 5., 3., 2., 3., 3., 3., 5., 2., 2., 2.,
        1., 2., 2., 3., 2., 2., 2., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.])
tensor([1.3921, 1.4763, 1.5278, 1.5721, 1.2640, 1.5378, 1.4876, 1.5809, 5.1485,
        3.0020, 2.7800, 2.8131, 2.8209, 2.9987, 5.1489, 1.9695, 2.1483, 1.8838,
        1.8403, 2.1489, 1.8766, 2.3746, 2.0565, 2.1554, 1.9712, 2.3354, 2.3256,
        4.9558, 3.8785, 3.6051, 3.7795, 3.5127, 3.9675, 3.7916, 4.9558, 2.8809,
        2.6083, 2.7952, 2.8826, 4.9561], grad_fn=<SumBackward1>)


In [8]:
my_string = ""
loop_start = list_zero_score_course_id[0]
loop_end = list_zero_score_course_id[-1]

for i in list_zero_score_course_id:
    if i == loop_end:
        my_string += '{}"student_id":{},"course_id":{},"grade":{},"major":"software","keywords":"security","ratings":{}{}{}'.format("{", temp_student_id_security, int(train_python[i-1]['course_id']), int(train_python[i-1]['grade']), round(float(ratings_pred[i-1]), 4), "}", "]")
    elif i == loop_start:
        my_string += '{}{}"student_id":{},"course_id":{},"grade":{},"major":"software","keywords":"security","ratings":{}{},'.format("[", "{", temp_student_id_security, int(train_python[i-1]['course_id']), int(train_python[i-1]['grade']), round(float(ratings_pred[i-1]), 4), "}")
    else:
        my_string += '{}"student_id":{},"course_id":{},"grade":{},"major":"software","keywords":"security","ratings":{}{},'.format("{", temp_student_id_security, int(train_python[i-1]['course_id']), int(train_python[i-1]['grade']), round(float(ratings_pred[i-1]), 4), "}")

In [9]:
with open("./Result_Security.json", 'w') as outfile:
    json.dump(json.loads(my_string), outfile, indent=4)