In [86]:
import numpy as np
import scipy as sp

import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

import path

import torch
from torch.utils import data
from torch import nn
import torch.nn.functional as F
from torch.optim import SGD

In [94]:
class SynteticSet(data.Dataset):
    def __init__(self, df, features, target="target"):
        target_dict = {"Class_1": 0, "Class_2": 1, "Class_3": 2, "Class_4": 3}
        
        for i in target_dict:
            df[target][df[target] == i] = target_dict[i]
           
        prefix = "feature_{}"
        
        for i,feature in enumerate(features):
            features[i] = prefix.format(str(feature))
        
            
        self.X = torch.tensor(df[features].to_numpy()).float()
        self.Y = torch.tensor(df[target].to_numpy(dtype=np.int64))
        
        
        
    def __len__(self):
        return self.Y.shape[0]
    
    def __getitem__(self, i):
        return self.X[i], self.Y[i]
    

In [126]:
data_path = path.Path(".\\data")
df = pd.read_csv(data_path/"train.csv")
    
dataset = SynteticSet(df, [i for i in range(3)])
dataset_len = len(dataset)

trainset, evalset = data.random_split(dataset, 
                                      [int(dataset_len*0.7), int(dataset_len*0.3)])

trainloader = data.DataLoader(trainset, batch_size=128, shuffle=True)
evalloader = data.DataLoader(evalset, batch_size=128)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [127]:
class NN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
                nn.Linear(3,4),
                nn.Softmax(dim=1)
            )
    def forward(self, x):
        return self.net(x)

In [128]:
net = NN()
lossF = nn.CrossEntropyLoss()

opt = SGD(net.parameters() ,lr=0.03)

In [129]:
def train_step():
    loss_list = list()
    for x,y in trainloader:

        opt.zero_grad()
        result = net(x)
        loss = lossF(result, y)

        loss.backward()
        opt.step()

        loss_list.append(loss.item())

    print(np.array(loss_list).mean())

In [130]:
for _ in range(100):
    train_step()

1.3459895003648301
1.2228841583297283
1.1894371132528105
1.1815714343592278
1.1778991294299446
1.1757248936449152
1.1742904109972268
1.1733017299031427
1.1725233627629672
1.1719526268228535
1.1714781481343606
1.171114304819752
1.1707771892303522
1.1705005421716945
1.1702955579844863
1.1701039596711043
1.169923316408119
1.1697650565961597
1.16960595752465
1.1695128892849742
1.1693981820530168
1.1692661291066646
1.1691868194060526
1.169121674471407
1.1690293254102606
1.168966626990212
1.1689057844866346
1.1688294787729463
1.1687814912586805
1.1687233110232589
1.1686686216368318
1.1686250843121322
1.1685823337251569
1.1685561002717375
1.1685102310651398
1.1684744397290663
1.1684608601129034
1.1684188539846723
1.1683905684751827
1.1683577403725829
1.1683283170354868
1.168322537455306
1.168271646203245
1.1682499300627212
1.1682193115816693
1.1682077426143276
1.168181063488153
1.1681773808582174
1.1681624350207815
1.1681320789309262
1.1680943479921526
1.1681140018337606
1.1680754991946316
1.

In [143]:
x, y = evalset[:]
result = net(x[:, 0:4])
accuracy = (result.argmax(dim=1) == y).float().mean()
loss = lossF(result, y)

print(f"Loss: {loss}  Accuracy: {accuracy}")

Loss: 1.17243492603302  Accuracy: 0.571566641330719
