In [2]:
import numpy as np
import scipy as sp

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import path

import torch
from torch.utils import data
from torch import nn
import torch.nn.functional as F
from torch.optim import SGD, Adam

In [3]:
class SynteticSet(data.Dataset):
    def __init__(self, df, features, target="target"):
        target_dict = {"Class_1": 0, "Class_2": 1, "Class_3": 2, "Class_4": 3}
        
        for i in target_dict:
            df[target][df[target] == i] = target_dict[i]
           
        prefix = "feature_{}"
        
        for i,feature in enumerate(features):
            features[i] = prefix.format(str(feature))
        
            
        self.X = torch.tensor(df[features].to_numpy()).float()
        self.Y = torch.tensor(df[target].to_numpy(dtype=np.int64))
        
        
        
    def __len__(self):
        return self.Y.shape[0]
    
    def __getitem__(self, i):
        return self.X[i].abs().long(), self.Y[i]
    

In [4]:
data_path = path.Path(".\\data")
df = pd.read_csv(data_path/"train.csv")
    
dataset = SynteticSet(df, [i for i in range(50)])
dataset_len = len(dataset)

trainset, evalset = data.random_split(dataset, 
                                      [int(dataset_len*0.7), int(dataset_len*0.3)])

trainloader = data.DataLoader(trainset, batch_size=128, shuffle=True)
evalloader = data.DataLoader(evalset, batch_size=128)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[target][df[target] == i] = target_dict[i]


In [5]:
dfc = df.copy()
dfc.pop("id")
dfc.pop("target")
embeddings_dim = list()
for i in dfc:
    embeddings_dim.append(max(dfc[i].unique()))

In [22]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.embeddings = nn.ModuleList([
            nn.Embedding(dim+1, 4) for dim in embeddings_dim
        ])
        
    def forward(self, x):
        r = torch.empty(x.shape[1], x.shape[0], 4)
        x = x.transpose(0, 1)
        for i,data in enumerate(x):
            r[i] = self.embeddings[i](data)
            
        r = r.transpose(1, 0)
        return r.mean(dim=1) 

In [23]:
net = NN()
lossF = nn.CrossEntropyLoss()

opt = Adam(net.parameters())

In [25]:
def train_step():
    loss_list = list()
    for x,y in trainloader:

        opt.zero_grad()
        result = net(x)
        loss = lossF(result, y)
        loss.backward()
        opt.step()

        loss_list.append(loss.item())

    print(np.array(loss_list).mean())

In [34]:
for _ in range(50):
    train_step()

1.0741822915713573
1.0741029693396043
1.0740058851416412
1.073968080432585
1.0739202129993404
1.0738145049790795
1.0738030203517657
1.0737338227371411
1.0736662816303957
1.0736604036535162
1.0735535096424806
1.0734896565050267
1.0734465579881964
1.0734397713401418
1.0733179260433483
1.07327995287217
1.0732253124116762
1.0731958741461773
1.0731229466101806
1.0730808081966867
1.0730310597846888
1.0729580921490407
1.0729235294096213
1.0728542244630497
1.0728366233313105
1.0727569770333536
1.072703926829141
1.0726545767984617
1.0726231173143963
1.0725727801567022
1.0725384982238086
1.072456162416085
1.072420281618561
1.0723574442662966
1.0723347340249056
1.0722703008590713
1.0721899148968936
1.0721395734895083
1.072119063716484
1.0720571808013009
1.072022500269156
1.0720049444674573
1.0719347573285565
1.0719072159509118
1.0718450827398074
1.071774481310487
1.0717381290151606
1.0717407991289003
1.071656330847871
1.0716130444728915


In [35]:
x, y = evalset[:]
result = net(x)
accuracy = (result.argmax(dim=1) == y).float().mean()
loss = lossF(result, y)

print(f"Loss: {loss}  Accuracy: {accuracy}")

Loss: 1.0963621139526367  Accuracy: 0.5786333084106445
