In [23]:
import numpy as np
import scipy as sp

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import path

import torch
from torch.utils import data
from torch import nn
import torch.nn.functional as F
from torch.optim import SGD, Adam

In [24]:
class SynteticSet(data.Dataset):
    def __init__(self, df, features, target="target"):
        target_dict = {"Class_1": 0, "Class_2": 1, "Class_3": 2, "Class_4": 3}
        
        for i in target_dict:
            df[target][df[target] == i] = target_dict[i]
           
        prefix = "feature_{}"
        
        for i,feature in enumerate(features):
            features[i] = prefix.format(str(feature))
        
            
        self.X = torch.tensor(df[features].to_numpy()).float()
        self.Y = torch.tensor(df[target].to_numpy(dtype=np.int64))
        
        
        
    def __len__(self):
        return self.Y.shape[0]
    
    def __getitem__(self, i):
        return self.X[i].view(1, -1), self.Y[i]
    

In [25]:
data_path = path.Path(".\\data")
df = pd.read_csv(data_path/"train.csv")
    
dataset = SynteticSet(df, [i for i in range(50)])
dataset_len = len(dataset)

trainset, evalset = data.random_split(dataset, 
                                      [int(dataset_len*0.7), int(dataset_len*0.3)])

trainloader = data.DataLoader(trainset, batch_size=128, shuffle=True)
evalloader = data.DataLoader(evalset, batch_size=128)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[target][df[target] == i] = target_dict[i]


In [26]:
dfc = df.copy()
dfc.pop("id")
dfc.pop("target")
embeddings_dim = list()
for i in dfc:
    embeddings_dim.append(max(dfc[i].unique())-1)

In [161]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv1d(1, 8, 3, 2),
            nn.BatchNorm1d(8),
            nn.ReLU(),
            nn.Conv1d(8, 16, 3, 2, 1),
            nn.BatchNorm1d(16),
            nn.ReLU(),
            nn.Conv1d(16, 32, 3, 2, 3),
            nn.BatchNorm1d(32),
            nn.ReLU(),
            nn.Conv1d(32, 1, 3, 2, 1),
            nn.ReLU(),
            nn.Softmax(dim=2)
        )
    def forward(self, x):
        return self.net(x).view(-1, 4)

In [162]:
net = NN()
lossF = nn.CrossEntropyLoss()

opt = Adam(net.parameters())

In [163]:
def train_step():
    loss_list = list()
    for x,y in trainloader:

        opt.zero_grad()
        result = net(x)
        loss = lossF(result, y)

        loss.backward()
        opt.step()

        loss_list.append(loss.item())

    print(np.array(loss_list).mean())

In [169]:
for _ in range(500):
    train_step()

1.1370466605400908
1.1370228273994962
1.1366828972068521
1.1373690421228217
1.1370936758557446
1.137194070563238
1.1371968311191258
1.1366349914174194
1.1367639864384582
1.1366688414073296
1.1369621973804844
1.1367728840934292
1.137311678699863
1.136787610908313
1.136863178483311
1.1364681428703574
1.137270390442564
1.1371056716446466
1.137009061668647
1.1369082404664927
1.1369050045989768
1.1369423317081095
1.1366060569037904
1.136745638123814
1.1367291131446742
1.1370329548497522
1.1364971002669395
1.1370704814764439
1.1363262378319525
1.1366703314144826
1.1368713348395645
1.1359590950134248
1.1365487638731544
1.136481119684153
1.1364428872818049
1.1366028816216172
1.1367524496162178
1.1365276178450645
1.1371888373408066
1.1362607287314517
1.1365771656498396
1.1368167077819433
1.1366142298229434
1.1365508738993726
1.1366297183132694
1.1365157337467675
1.1364562720659663
1.1361369997317439
1.1365797713644108
1.1364434794054608
1.1367515349518882
1.136521174223375
1.1367244245365289
1.

1.134115802958417
1.134317805605789
1.13429543174381
1.13402943942403
1.1346304606258106
1.1337634438352568
1.13430905036996
1.1338965064646775
1.1338711142322044
1.1340098193620196
1.1342213182170386
1.1340076215088477
1.1346135885885473
1.1340509426441228
1.1339063304433874
1.1340675478225652
1.1340704036151252
1.1338449967190813
1.1341562691728637
1.1338780601892
1.1343227064805668
1.1338593150187672
1.1343616311467326
1.1339246723290117
1.1341603822202526
1.1339166604622626
1.1337644275843117
1.1334203826008158
1.133312322103999
1.1336473745227513
1.1338654246583064
1.1339541237794504
1.1343088716649923
1.1339820200608024
1.1339607655021346
1.1340014086780863
1.134150982338942
1.1342688057932602
1.1341951276963982
1.1338539306598783
1.1340453384999165
1.1334994062645125
1.133949847064463
1.1340434374713375
1.1338674396657857
1.1341791828546053
1.1341786181904934
1.1341267431893776
1.133970732662752
1.1338542548350485
1.13424518884209
1.134407899497414
1.1344666964829078
1.134209118

In [170]:
x, y = evalset[:]
result = net(x.view(-1, 1, 50))
accuracy = (result.argmax(dim=1) == y).float().mean()
loss = lossF(result, y)

print(f"Loss: {loss}  Accuracy: {accuracy}")

Loss: 1.1739957332611084  Accuracy: 0.5682333111763
