In [1]:
import pandas as pd
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
import warnings
warnings.simplefilter('ignore')

In [2]:
ans = pd.read_csv('train_answers.csv')
ans['path'] = ans['id'].apply(lambda x: f'train_images/img_{x}.png')
train = []
for _, y, path in ans.to_numpy():
    img = transforms.PILToTensor()(Image.open(path).resize((128, 128)))
    img = img.to(float)
    #img = transforms.Normalize([0.5], [0.5])(img)
    train.append((img, y))

In [8]:
to_train = train[:20000]
to_test = train[20000:]
ans

Unnamed: 0,id,target_feature,path
0,0,0,train_images/img_0.png
1,1,2,train_images/img_1.png
2,2,2,train_images/img_2.png
3,3,0,train_images/img_3.png
4,4,1,train_images/img_4.png
...,...,...,...
26995,26995,0,train_images/img_26995.png
26996,26996,2,train_images/img_26996.png
26997,26997,1,train_images/img_26997.png
26998,26998,2,train_images/img_26998.png


In [23]:
train_load = DataLoader(to_train, batch_size=25, shuffle=True)
test_load = DataLoader(to_test, batch_size=25, shuffle=True)

In [24]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def train_model(model, optimizer, loss_function, loader):
    model.train()

    history = []
    c = 1
    for X, y in loader:
        X = X.float()
        y = y.long()

        pred = model(X)
        loss = loss_function(pred, y)
        l1 = loss.item()
        history.append(l1)
        print(c, l1)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        c += 1
    return history

def test_model(model, loss_function, loader):
    model.eval()
    all_pred = []
    all_true = []
    history = []
    total_loss = 0
    for X, y in loader:
        X = X.float()
        y = y.long()
        a = model(X)
        ans = [np.array(x.detach().numpy()).argmax() for x in a]
        loss = loss_function(a, y)
        l1 = loss.item()
        total_loss += l1
        print(l1)
        all_pred.extend(ans)
        all_true.extend(y.tolist())
        history.append(loss.item())
        loss.backward()
    print(f"Accuracy: {accuracy_score(all_true, all_pred)}")
    print(f"Presicion: {precision_score(all_true, all_pred, average='weighted')}")
    print(f"Recall: {recall_score(all_true, all_pred, average='weighted')}")
    print(f"F1 score: {f1_score(all_true, all_pred, average='weighted')}")
    print(f'Потеря на тестовой выборке {total_loss / len(loader)}')
    return history

In [25]:
model = nn.Sequential(
    nn.Conv2d(1, 16, 5),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),

    nn.Conv2d(16, 32, 5),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),

    nn.Conv2d(32, 64, 4),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),

    nn.Conv2d(64, 128, 4),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.MaxPool2d(2, 2),

    nn.Conv2d(128, 256, 5),
    nn.ReLU(),

    nn.Flatten(-3),
    nn.Linear(256, 128),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.Linear(128, 64),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.Linear(64, 32),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.Linear(32, 16),
    nn.Dropout(p=0.2),
    nn.ReLU(),
    nn.Linear(16, 3)
)

In [28]:
optim = torch.optim.SGD(model.parameters(), lr=0.0001)
loss = nn.CrossEntropyLoss()

In [29]:
hist = train_model(model, optim, loss, train_load)
plt.plot(range(1, len(hist) + 1), hist)

1 1.0590306520462036
2 1.1225618124008179
3 1.0955907106399536
4 1.1434069871902466
5 1.0962663888931274
6 1.1003081798553467
7 1.071761965751648
8 1.1224225759506226
9 1.097135066986084
10 1.1211413145065308
11 1.0952668190002441
12 1.0943090915679932
13 1.1035356521606445
14 1.0876048803329468
15 1.1189473867416382
16 1.1059948205947876
17 1.0614268779754639
18 1.0470420122146606
19 1.0611003637313843
20 1.1065839529037476
21 1.0795766115188599
22 1.1176631450653076
23 1.0866965055465698
24 1.1154320240020752
25 1.1383767127990723
26 1.115302562713623
27 1.140577793121338
28 1.1329153776168823
29 1.0933541059494019
30 1.0820008516311646
31 1.0967891216278076
32 1.0999958515167236
33 1.101112961769104
34 1.123542308807373


KeyboardInterrupt: 

In [22]:
hist = test_model(model, loss, test_load)
plt.plot(range(1, len(hist) + 1), hist)

1.101883053779602
1.0672178268432617
1.0697286128997803
1.069008708000183
1.0821033716201782
1.0509510040283203
1.1611430644989014
1.0625858306884766
1.0302414894104004
1.0512696504592896
1.066550612449646
1.1178398132324219
1.0556714534759521
1.0689477920532227
1.0437097549438477
1.0855662822723389
1.0474025011062622
1.0485646724700928
1.0443274974822998
1.1003555059432983
1.0464160442352295
1.0504381656646729
1.066794991493225
1.0166828632354736
1.0913342237472534
1.0695472955703735
1.0434768199920654
1.07101309299469
1.060221552848816
1.0741846561431885
1.0582501888275146
1.043460488319397
1.064903736114502
1.0733835697174072
1.042049765586853
1.0494766235351562
1.06272554397583
1.0787222385406494
1.0543017387390137
1.0871864557266235
1.0537608861923218
1.0586328506469727
1.0794488191604614
1.0669575929641724
1.0619999170303345
1.044455885887146
1.0601948499679565
1.051945447921753
1.0516129732131958
1.054405927658081
1.074438214302063
1.0896761417388916
1.0735538005828857
1.0966632

KeyboardInterrupt: 

In [11]:
from os import listdir

fin = pd.DataFrame(listdir('test_images'))
fin['Id'] = fin[0].apply(lambda x: int(x.split('_')[1].split('.')[0]))
fin.sort_values('Id', inplace=True)
fin.rename(columns={0: 'path'}, inplace=True)
fin['path'] = fin['path'].apply(lambda x: f"test_images/{x}")
fin.reset_index(drop=True, inplace=True)
fin

Unnamed: 0,path,Id
0,test_images/img_0.png,0
1,test_images/img_1.png,1
2,test_images/img_2.png,2
3,test_images/img_3.png,3
4,test_images/img_4.png,4
...,...,...
6915,test_images/img_6915.png,6915
6916,test_images/img_6916.png,6916
6917,test_images/img_6917.png,6917
6918,test_images/img_6918.png,6918


In [12]:
test_final = []
for path, id in fin.to_numpy():
    img = transforms.PILToTensor()(Image.open(path).resize((128, 128)))
    img = img.to(float)
    img = transforms.Normalize([0.5], [0.5])(img)
    test_final.append(img)

In [13]:
answers = []
c = 0
for x in test_final:
    x = x.float()
    answers.append(model(x))
answers = [x.detach().numpy() for x in answers]
answers = [x.argmax() for x in answers]

In [None]:
fin['target_feature'] = answers
fin.drop(['path'], axis=1, inplace=True)


In [15]:
fin.rename(columns={'Id': 'id'}, inplace=True)

In [16]:
fin.to_csv('answer.csv', index=False)