***Милютина Лилия Александровна***

In [None]:
import os
import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import matplotlib.pyplot as plt


import torchvision
import torch.utils.data as data
import torchvision.models as models
import torchvision.transforms as transforms

import PIL
from PIL import Image
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

In [None]:
torch.cuda.is_available()

**Считываем данные из диска**

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
data_path = "/content/drive/MyDrive/ml3/"
train_ann_path = data_path + 'train.csv'

train_df = pd.read_csv(train_ann_path)
print(train_df.head())

In [None]:
!unzip -q '/content/drive/MyDrive/ml3/train.zip' -d './'

In [None]:
!unzip -q '/content/drive/MyDrive/ml3/test.zip' -d './'

In [None]:
print(len(os.listdir('./train/')))
print(len(os.listdir('./test/')))

In [None]:
# test
image_names = []
for file in os.listdir('./test/'):
  image_names.append(file)

test = pd.DataFrame(image_names, columns=["filename"])
test["class_number"] = 0
test.reset_index(inplace=True, drop=True)

In [None]:
print(test.head())

**Создаем кастомный OurDataset**

In [None]:
class OurDataset(data.Dataset):
  """Our dataset class.

    Arguments:
        root (str): path to images
        imlist - pandas DataFrame with columns file_name, class
        transform - torchvision transform applied to every image
    """
  def __init__(self, root, flist, transform=None):
        super().__init__()
        self.root   = root
        self.imlist = flist
        self.transform = transform

  def __getitem__(self, index):
        impath, target = self.imlist.loc[index] 
        
        full_imname = os.path.join(self.root, impath)
        
        if not os.path.exists(full_imname):
            print('No file ', full_imname)
            pass

        img = Image.open(full_imname).convert('RGB')
        img = self.transform(img)

        return img, target, impath

  def __len__(self):
        return len(self.imlist)

**Transforms**

In [None]:
transform_for_train_and_val = transforms.Compose([   
        transforms.RandomApply([
        transforms.RandomRotation(degrees = 20),
        transforms.RandomAffine(degrees = 0, translate=(0.1, 0.1)),
        transforms.RandomAffine(degrees = 0, shear=20),
        transforms.RandomAffine(degrees = 0, scale=(0.8, 0.8))]),
        transforms.ToTensor()
        ])

transform_for_test = transforms.Compose([   
        transforms.ToTensor()
        ])

In [None]:
train, val = train_test_split(train_df, test_size=0.2, random_state=24)

train.reset_index(inplace=True, drop=True)
val.reset_index(inplace=True, drop=True)

batch_size=32

**Делаем sampling**

In [None]:
from imblearn.over_sampling import RandomOverSampler
ros = RandomOverSampler('minority')
X_res, y_res = ros.fit_resample(train.iloc[:, 0].to_numpy().reshape(-1, 1), train.iloc[:, 1])

list_ = [X_res.tolist(), y_res.tolist()]
train_ = pd.DataFrame(X_res, columns=["filename"])
train_["class_number"] = y_res

In [None]:
trainset = OurDataset(root='./train', flist=train_, transform=transform_for_train_and_val)
valset = OurDataset(root='./train', flist=val, transform=transform_for_train_and_val)

In [None]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=8, shuffle=True, pin_memory=True)                          
valloader = torch.utils.data.DataLoader(valset, batch_size=batch_size, num_workers=8, pin_memory=True)

**Посмотрим на изображения**

In [None]:
def myshow(img):
    npimg = img.detach().numpy()
    fig = plt.figure(figsize=(10, 10))
    plt.imshow(npimg.transpose(1, 2, 0))

trainiter = iter(trainloader)
images, labels, impaths = trainiter.next()
myshow(torchvision.utils.make_grid(images))

In [None]:
print(images.shape)
print(labels)
print(impaths)

**Создаем модель**

In [None]:
nclasses = 67

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=7,padding=(3,3))
        self.conv2 = nn.Conv2d(64, 128, kernel_size=5,padding=(2,2))
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3,padding=(1,1))
        self.batchnorm1 = nn.BatchNorm2d(64)
        self.batchnorm2 = nn.BatchNorm2d(128)
        self.batchnorm3 = nn.BatchNorm2d(256)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(256*6*6, 256)
        self.fc2 = nn.Linear(256, nclasses)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.batchnorm1(self.conv1(x))), 2)
        x = F.max_pool2d(F.relu(self.batchnorm2(self.conv2(x))), 2)
        x = F.max_pool2d(F.relu(self.batchnorm3(self.conv3(x))), 2)
        x = x.view(-1, 256*6*6)
        x = F.dropout(x, training=self.training)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [None]:
net = Net()
net = net.cuda()

lr = 1e-3
num_epochs = 20

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=0.9)

**Создаем функцию для обучения**

In [None]:
def run_epoch(epoch, is_train):
  """
  Training and evaluaton loop over samples
  Args:
      train_mode (bool): True for train mode
  """
  if is_train:
      net.train()
      loader = trainloader
      print("Training epoch: ", epoch + 1, "/", num_epochs)
  else:
      net.eval()
      loader = valloader
      print('Validation')
      
  running_loss = 0.0
  correct = 0.0
  total = 0.0

  for i, data in enumerate(loader):
      images, labels, _= data
      images, labels = images.cuda(), labels.cuda()

      outputs = net(images)
      loss = criterion(outputs, labels)
      
      if is_train:
          optimizer.zero_grad()
          loss.backward()
          optimizer.step()

      running_loss += loss.item()
      total += images.data.size(0)

      _, predicted = torch.max(outputs.data, 1)
      correct += (predicted == labels.data).sum()
      
  print('Loss: {:.3f}, accuracy: {:.3f}'.format(running_loss / (i + 1), correct / total * 100.0))

**Обучение**

In [None]:
%%time
for epoch in range(num_epochs):
  run_epoch(epoch, is_train=True) 

  with torch.no_grad():
      run_epoch(epoch, is_train=False)

  print('----------------------')

print('Finished training! Enjoy your results!')

**Предсказания на тестовом датасете**

In [None]:
testset = OurDataset(root='./test', flist=test, transform=transform_for_test)
testloader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=8, shuffle=False, pin_memory=True)                       

In [None]:
res = []
for i, data in enumerate(testloader):
    images, labels, _= data
    images, labels = images.cuda(), labels.cuda()
    outputs = net(images)
    outputs_res = torch.argmax(outputs, dim=1).cpu().tolist()
    res += outputs_res
test["class_number"] = res

In [None]:
test.to_csv('/content/drive/MyDrive/output.csv', index=False)