In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("synthetic_data.csv")
courses = list(df["course"].unique())
course_topics = list(df["course_topic"].unique())

In [4]:
course_encoder = LabelEncoder()
topic_encoder = LabelEncoder()

In [5]:
df["course"] = course_encoder.fit_transform(df["course"])
df["course_topic"] = topic_encoder.fit_transform(df["course_topic"])

In [6]:
scaler = MinMaxScaler()

In [7]:
df[
    [
        "course_grade",
        "easy_correct",
        "medium_correct",
        "hard_correct",
        "days_to_deadline",
    ]
] = scaler.fit_transform(
    df[
        [
            "course_grade",
            "easy_correct",
            "medium_correct",
            "hard_correct",
            "days_to_deadline",
        ]
    ].fillna(0)
)

In [8]:
df.head()

Unnamed: 0,user,course,course_grade,course_topic,easy_correct,medium_correct,hard_correct,upcoming_assignment,days_to_deadline,target
0,1,0,0.800459,60,0.912297,0.51366,0.439371,1,0.393914,0.55345
1,1,0,0.800459,37,0.774239,0.800331,0.447982,1,0.592694,0.478065
2,1,0,0.800459,95,0.269247,0.530319,0.652642,1,0.484431,0.621308
3,1,0,0.800459,101,0.746112,0.332602,0.917636,1,0.943728,0.670455
4,1,0,0.800459,51,0.989983,0.992933,0.39895,0,0.0,0.342731


In [9]:
df.describe()

Unnamed: 0,user,course,course_grade,course_topic,easy_correct,medium_correct,hard_correct,upcoming_assignment,days_to_deadline,target
count,7300.0,7300.0,7300.0,7300.0,7300.0,7300.0,7300.0,7300.0,7300.0,7300.0
mean,25.5,2.746575,0.472724,72.5,0.48025,0.526988,0.551401,0.69137,0.350547,0.471289
std,14.431858,1.691397,0.259415,42.148468,0.269674,0.267965,0.264979,0.46196,0.335135,0.149529
min,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,13.0,1.0,0.258115,36.0,0.253689,0.315366,0.346933,0.0,0.0,0.364784
50%,25.5,3.0,0.457056,72.5,0.474373,0.536311,0.569196,1.0,0.288848,0.483302
75%,38.0,4.0,0.695063,109.0,0.70077,0.752885,0.768345,1.0,0.646978,0.58145
max,50.0,5.0,1.0,145.0,1.0,1.0,1.0,1.0,1.0,0.919125


In [11]:
train_val_df, test_df = train_test_split(df, test_size=0.2, random_state=42)
train_df, val_df = train_test_split(train_val_df, test_size=0.25, random_state=42)

In [12]:
class StudyDataset(torch.utils.data.Dataset):
    def __init__(self, df):
        self.df = df

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        course = row['course']
        topic = row['course_topic']
        grade = row['course_grade']
        easy_correct = row['easy_correct']
        medium_correct = row['medium_correct']
        hard_correct = row['hard_correct']
        upcoming_assignment = row['upcoming_assignment']
        days_to_deadline = row['days_to_deadline']
        target = row['target']

        inputs = {
            'course': torch.tensor(course, dtype=torch.long),
            'topic': torch.tensor(topic, dtype=torch.long),
            'features': torch.tensor([grade, easy_correct, medium_correct, hard_correct, upcoming_assignment, days_to_deadline], dtype=torch.float)
        }

        return inputs, torch.tensor(target, dtype=torch.float)

In [16]:
class StudyRecommenderModel(nn.Module):
    def __init__(self, n_courses, n_topics, emb_dim, hidden_size):
        super(StudyRecommenderModel, self).__init__()

        self.course_embedding = nn.Embedding(n_courses, emb_dim)
        self.topic_embedding = nn.Embedding(n_topics, emb_dim)

        self.fc1 = nn.Linear(emb_dim * 2 + 6, hidden_size)
        self.dropout1 = nn.Dropout(p=0.2)
        self.fc2 = nn.Linear(hidden_size, 32)
        self.dropout2 = nn.Dropout(p=0.2)
        self.fc3 = nn.Linear(32, 1)

    def forward(self, course, topic, features):
        course_emb = self.course_embedding(course)
        topic_emb = self.topic_embedding(topic)

        x = torch.cat([course_emb, topic_emb, features], dim=1)

        x = torch.relu(self.fc1(x))
        x = self.dropout1(x)
        x = torch.relu(self.fc2(x))
        x = self.dropout2(x)
        x = torch.sigmoid(self.fc3(x))

        return x

In [19]:
model = StudyRecommenderModel(len(courses), len(course_topics), 32, 64)
crit = nn.MSELoss()
opt = optim.Adam(model.parameters(), lr=0.001)

train_dataset = StudyDataset(train_df)
val_dataset = StudyDataset(val_df)
test_dataset = StudyDataset(test_df)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=32)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)

epochs = 25

In [20]:
for epoch in range(epochs):
    model.train() 
    train_loss = 0
    
    for inputs, target in train_loader:
        course = inputs['course']
        topic = inputs['topic']
        features = inputs['features']
        output = model(course, topic, features)
        loss = crit(output.squeeze(), target)
        opt.zero_grad()
        loss.backward()
        opt.step()
        train_loss += loss.item()
    
    avg_train_loss = train_loss / len(train_loader)
    
    model.eval() 
    val_loss = 0
    
    with torch.no_grad():
        for inputs, target in val_loader:
            course = inputs['course']
            topic = inputs['topic']
            features = inputs['features']
            
            output = model(course, topic, features)
            loss = crit(output.squeeze(), target)
            val_loss += loss.item()
    
    avg_val_loss = val_loss / len(val_loader)
    
    print(f"Epoch {epoch+1}/{epochs}, Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}")

Epoch 1/25, Train Loss: 0.0212, Val Loss: 0.0163
Epoch 2/25, Train Loss: 0.0149, Val Loss: 0.0128
Epoch 3/25, Train Loss: 0.0133, Val Loss: 0.0124
Epoch 4/25, Train Loss: 0.0127, Val Loss: 0.0121
Epoch 5/25, Train Loss: 0.0122, Val Loss: 0.0117
Epoch 6/25, Train Loss: 0.0117, Val Loss: 0.0113
Epoch 7/25, Train Loss: 0.0115, Val Loss: 0.0108
Epoch 8/25, Train Loss: 0.0111, Val Loss: 0.0109
Epoch 9/25, Train Loss: 0.0111, Val Loss: 0.0106
Epoch 10/25, Train Loss: 0.0109, Val Loss: 0.0106
Epoch 11/25, Train Loss: 0.0106, Val Loss: 0.0108
Epoch 12/25, Train Loss: 0.0108, Val Loss: 0.0106
Epoch 13/25, Train Loss: 0.0106, Val Loss: 0.0107
Epoch 14/25, Train Loss: 0.0105, Val Loss: 0.0106
Epoch 15/25, Train Loss: 0.0104, Val Loss: 0.0106
Epoch 16/25, Train Loss: 0.0104, Val Loss: 0.0105
Epoch 17/25, Train Loss: 0.0105, Val Loss: 0.0107
Epoch 18/25, Train Loss: 0.0103, Val Loss: 0.0105
Epoch 19/25, Train Loss: 0.0101, Val Loss: 0.0104
Epoch 20/25, Train Loss: 0.0103, Val Loss: 0.0106
Epoch 21/

In [21]:
torch.save(model.state_dict(), 'model.pth')

In [22]:
model.eval() 
test_loss = 0

with torch.no_grad():
    for inputs, target in test_loader:
        course = inputs['course']
        topic = inputs['topic']
        features = inputs['features']
        
        # Forward pass
        output = model(course, topic, features)
        loss = crit(output.squeeze(), target)
        test_loss += loss.item()

avg_test_loss = test_loss / len(test_loader)
print(f"Test Loss: {avg_test_loss:.4f}")

Test Loss: 0.0110
