<a href="https://colab.research.google.com/github/AlexeyArcher/Proj_nn/blob/main/proj.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
from pathlib import Path
from zipfile import ZipFile as zipfile
import sys
import time
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils
import torchvision
import torchvision.models as models
import torchvision.transforms as transforms
from sklearn.model_selection import train_test_split
from torch.autograd import Variable
import shutil

In [None]:
#from google.colab import drive
#drive.mount('/content/drive')

In [None]:
image_path = Path("/content/drive/MyDrive/Colab Notebooks")
input_folder = image_path / "dataset"
if not os.path.exists(input_folder):
  with zipfile(image_path/"dataset.zip", 'r') as zipf:
      print("Extracting files...")
      zipf.extractall(image_path)

In [None]:
device = torch.device('cuda:0')
batch_size = 64
torch.backends.cudnn.benchmark = True

In [None]:
data = pd.read_csv(image_path / "train.csv")

data_pic = data["image"]
data_y = data["Class"]

In [None]:
total_classes = len(data_y.unique())

In [None]:
pic_train, pic_test, y_train, y_test = train_test_split(data_pic, data_y, train_size=0.8, shuffle=True)

In [None]:

normalize = transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])

train_transform = transforms.Compose([
          transforms.Resize((224,224)),
          transforms.RandomResizedCrop(224),
          transforms.RandomHorizontalFlip(),
          transforms.ColorJitter(
            brightness=0.4,
            contrast=0.4,
            saturation=0.4,
            hue=0.2),
          transforms.ToTensor(),
          normalize,
        ])

test_transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    normalize,
  ])

In [None]:
Path(image_path / "dataset" / "train").mkdir(exist_ok = True, parents = True)
Path(image_path / "dataset" / "test").mkdir(exist_ok = True, parents = True)

In [None]:


if not os.path.exists(image_path / "dataset" / "train" / "13"):
  for image_name in pic_train:
    Path(image_path / "dataset" / "train" / str(data[data["image"] == image_name]["Class"].values[0])).mkdir(exist_ok = True, parents = True)
    shutil.move(
        input_folder / image_name, 
        input_folder / "train" / str(data[data["image"] == image_name]["Class"].values[0]) / image_name
        )
  for image_name in pic_test:
    Path(image_path / "dataset" / "test" / str(data[data["image"] == image_name]["Class"].values[0])).mkdir(exist_ok = True, parents = True)
    shutil.move(
        input_folder / image_name, 
        input_folder / "test" / str(data[data["image"] == image_name]["Class"].values[0]) / image_name
        )

In [None]:
trainset = torchvision.datasets.ImageFolder(os.path.join(input_folder, "train"), transform=train_transform)
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, num_workers=2, shuffle=True, pin_memory = True)

testset = torchvision.datasets.ImageFolder(os.path.join(input_folder, "test"), transform=test_transform)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, num_workers=2, shuffle=True, pin_memory = True)

In [None]:
from torchvision.models import ResNet101_Weights

model_ft = models.resnet101(weights=ResNet101_Weights.DEFAULT)
num_ftrs = model_ft.fc.in_features

model_ft.fc = nn.Linear(num_ftrs, total_classes)
model_ft = model_ft.to(device)


criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_ft.parameters(), lr=0.1, momentum=0.9)

lrscheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', patience=1)

Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /root/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth


  0%|          | 0.00/171M [00:00<?, ?B/s]

In [None]:
def train_model(model, criterion, optimizer, scheduler, n_epochs = 5):
    
    losses = []
    accuracies = []
    test_accuracies = []
    model.train()
    for epoch in range(n_epochs):
        since = time.time()
        running_loss = 0.0
        for i, data in enumerate(train_loader, 0):
            inputs, labels = data
            #print(inputs.shape)
            inputs = inputs.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            loss = criterion(outputs, labels)
            if (i % 1000 == 0) :
              torch.save({
              'epoch': epoch,
              'model_state_dict': model.state_dict(),
              'optimizer_state_dict': optimizer.state_dict(),
              'loss': loss,
              }, image_path / "checkpoint_model.pt")
              model_temp = torch.jit.script(model)
              model_temp.save("model_temp.pt")
              print("Saved...")
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

        epoch_duration = time.time()-since
        epoch_loss = running_loss/len(train_loader)
        print("Epoch %s, duration: %d s, loss: %.4f" % (epoch+1, epoch_duration, epoch_loss))
        
        losses.append(epoch_loss)
        
        model.eval()
        test_acc = eval_model(model)
        test_accuracies.append(test_acc)
        
        model.train()
        scheduler.step(test_acc)
        since = time.time()
    print('Finished Training')
    return model, losses, test_accuracies


In [18]:
def eval_model(model):
    correct = 0.0
    total = 0.0
    with torch.no_grad():
        for _, data in enumerate(test_loader, 0):
            images, labels = data
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model_ft(images)
            print(outputs.size)
            _, predicted = torch.max(outputs.data, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_acc = 100.0 * correct / total
    print('test acc: %d %%' % (
        test_acc))
    return test_acc

In [None]:

model_ft, training_losses, test_accs = train_model(model_ft, criterion, optimizer, lrscheduler, n_epochs=20)

Saved...


In [None]:
torch.cuda.get_device_name(0)

In [17]:
model_saved = torch.jit.script(model_ft)
model_saved.save("model_resnet101.pt")

In [None]:
model_check = torch.jit.load