In [None]:
import torch  
import torch.nn as nn
import torch.nn.functional as F

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


import pandas as pd 

In [68]:
file_path = '/Users/ortalhanuna/Downloads/jj3tw8kj6h-3/sensor_raw.csv'
df = pd.read_csv(file_path)



In [69]:
features = ['AccX','AccY','AccZ','GyroX','GyroY','GyroZ']
x = df[features].values
y = df['Class'].astype('category').cat.codes.values
print(x,y)

[[ 0.22680664 -0.13232422 -1.01049805 -0.49618321  2.51145038  0.70229008]
 [ 0.33129883 -0.14575195 -0.99829102 -1.51908397  2.83206107 -1.24427481]
 [ 0.43139648 -0.2097168  -1.00415039  0.67938931  3.20610687 -0.22900763]
 ...
 [ 0.41918945 -0.19287109 -1.00268555 -1.0610687   4.53435115  1.18320611]
 [ 0.30883789 -0.09008789 -1.02319336 -1.01526718  5.25954198  0.65648855]
 [ 0.09814453 -0.01586914 -1.0012207  -0.8778626   4.48854962  0.04580153]] [2 2 2 ... 3 3 3]


In [None]:
## normalize
x = StandardScaler().fit_transform(x)


In [71]:
x_train, x_test, y_train, y_test = train_test_split(
    torch.tensor(x, dtype=torch.float32),
    torch.tensor(y,dtype=torch.int),
    test_size=0.2,random_state=42)

In [83]:
from torch.utils.data import DataLoader, TensorDataset

train_ds = TensorDataset(x_train,y_train)
train_loader = DataLoader(train_ds,64,shuffle=True)



In [72]:
class TeacherModel(nn.Module):
    def __init__(self):
        super().__init__()
        
        self.fc1 = nn.Linear(6, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 4) ## extract the p to each class (4 class)
        self.relu = nn.ReLU()

    def forward(self,x):
        x = self.fc1(x) 
        x = self.relu(x)
        x = self.fc2(x) 
        x = self.relu(x)
        x = self.fc3(x) 
        return x

model_teacher = TeacherModel()
optimizer_teacher = torch.optim.Adam(model_teacher.parameters(),0.001)


In [84]:
for epoch in range(5000):
    model_teacher.train()
    for x_train,y_train in train_loader:
        optimizer_teacher.zero_grad()
        logits = model_teacher(x_train) ## extract the p to each class (4 class)
        loss = F.cross_entropy(logits, y_train.long())
        loss.backward()
        optimizer_teacher.step()
        if (epoch+1) % 10 == 0:
            print(f"Teacher Epoch {epoch+1}, Loss: {loss.item():.4f}")

Teacher Epoch 10, Loss: 0.0035
Teacher Epoch 10, Loss: 0.0142
Teacher Epoch 10, Loss: 0.0030
Teacher Epoch 10, Loss: 0.0129
Teacher Epoch 10, Loss: 0.0022
Teacher Epoch 10, Loss: 0.0025
Teacher Epoch 10, Loss: 0.0035
Teacher Epoch 10, Loss: 0.0155
Teacher Epoch 10, Loss: 0.0071
Teacher Epoch 10, Loss: 0.0038
Teacher Epoch 10, Loss: 0.0037
Teacher Epoch 10, Loss: 0.0047
Teacher Epoch 10, Loss: 0.0037
Teacher Epoch 10, Loss: 0.0072
Teacher Epoch 20, Loss: 0.0009
Teacher Epoch 20, Loss: 0.0034
Teacher Epoch 20, Loss: 0.0015
Teacher Epoch 20, Loss: 0.0161
Teacher Epoch 20, Loss: 0.0023
Teacher Epoch 20, Loss: 0.0032
Teacher Epoch 20, Loss: 0.0041
Teacher Epoch 20, Loss: 0.0040
Teacher Epoch 20, Loss: 0.0042
Teacher Epoch 20, Loss: 0.0032
Teacher Epoch 20, Loss: 0.0111
Teacher Epoch 20, Loss: 0.0047
Teacher Epoch 20, Loss: 0.0033
Teacher Epoch 20, Loss: 0.0065
Teacher Epoch 30, Loss: 0.0070
Teacher Epoch 30, Loss: 0.0039
Teacher Epoch 30, Loss: 0.0011
Teacher Epoch 30, Loss: 0.0087
Teacher 

In [None]:
class StudentModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(6, 16)
        self.fc2 = nn.Linear(16, 4)
        self.relu = nn.ReLU()

    def forward(self,x):
        x = self.fc1(x) 
        x = self.relu(x)
        x = self.fc2(x) 
        
        return x

model_student = StudentModel()
optimizer_student = torch.optim.Adam(model_student.parameters(),0.0001)

In [80]:
def distill_loss(s_logits, t_logits, y, T = 2.0, alpha = 0.7):
    
    soft_loss = F.kl_div(
        F.log_softmax(s_logits/T, dim=1),
        F.softmax(t_logits/T,dim=1),
        reduction='mean'
    )*(T*T)

    hard_loss = F.cross_entropy(s_logits,y)

    loss = alpha*soft_loss + (1-alpha)*hard_loss 
    return loss


In [None]:
for epoch in range(50000):
    model_student.train()
    for x_train,y_train in train_loader:
        optimizer_student.zero_grad()
        with torch.no_grad():
            t_logits = model_teacher(x_train)
        s_logits = model_student(x_train)
        loss = distill_loss(s_logits,t_logits,y_train.long())
        loss.backward()
        optimizer_student.step()
        if (epoch+1) % 10 == 0:
            print(f"Student Epoch {epoch+1}, Loss: {loss.item():.4f}")

correct = 0
total = 0
with torch.no_grad():
    pred = model_student(x_test).argmax(1)
    correct += (pred==y_test).sum().item()
    total += y_train.size(0)
    acc = (correct/total)*100
    print(f"Student Accuracy: {acc:.2f}%")




Student Epoch 10, Loss: 0.4487
Student Epoch 10, Loss: 0.7137
Student Epoch 10, Loss: 0.5429
Student Epoch 10, Loss: 0.6302
Student Epoch 10, Loss: 0.4956
Student Epoch 10, Loss: 0.6316
Student Epoch 10, Loss: 0.5166
Student Epoch 10, Loss: 0.5198
Student Epoch 10, Loss: 0.5375
Student Epoch 10, Loss: 0.5707
Student Epoch 10, Loss: 0.6321
Student Epoch 10, Loss: 0.5036
Student Epoch 10, Loss: 0.4428
Student Epoch 10, Loss: 0.5134
Student Epoch 20, Loss: 0.7106
Student Epoch 20, Loss: 0.4349
Student Epoch 20, Loss: 0.5078
Student Epoch 20, Loss: 0.5861
Student Epoch 20, Loss: 0.5883
Student Epoch 20, Loss: 0.3882
Student Epoch 20, Loss: 0.5540
Student Epoch 20, Loss: 0.7151
Student Epoch 20, Loss: 0.5498
Student Epoch 20, Loss: 0.5816
Student Epoch 20, Loss: 0.5629
Student Epoch 20, Loss: 0.4450
Student Epoch 20, Loss: 0.5856
Student Epoch 20, Loss: 0.4864
Student Epoch 30, Loss: 0.4467
Student Epoch 30, Loss: 0.5484
Student Epoch 30, Loss: 0.7287
Student Epoch 30, Loss: 0.8173
Student 

KeyboardInterrupt: 