Self written AI

Import

In [1]:
import numpy as np
import PIL

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

import os

In [2]:
torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.3, 0.3, 0.3), (0.3,0.3,0.3))
])

In [5]:
class DataProcess():
    def __init__(self, label_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(label_file, header=None)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = PIL.Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [8]:
train_data=DataProcess(label_file="./dataset/default/labels/TRAIN.csv",img_dir="./dataset/default/TRAIN/",transform=transform)
test_data=DataProcess(label_file="./dataset/default/labels/TEST.csv",img_dir="./dataset/default/TEST/",transform=transform)

In [6]:
image,label=train_data[0]
image.size()

torch.Size([3, 320, 479])

In [9]:
train_loader= torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True, num_workers=0)
test_loader= torch.utils.data.DataLoader(test_data, batch_size=32, shuffle=False, num_workers=0)

In [14]:
class NeuralNet(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1= nn.Conv2d(3, 30, 20) # (50, 300, 459) ((320-20)/5)+1 = 28
    self.pool= nn.MaxPool2d(4, 4) # (50, 150, 229)
    self.conv2 = nn.Conv2d(30, 60, 60) #(30, 120, 199) => (30, 60, 99.5)
    self.fc1 = nn.Linear(3360, 1600)
    self.fc2 = nn.Linear(1600, 800)
    self.fc3 = nn.Linear(800, 8)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [7]:
class NeuralNetV2(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1= nn.Conv2d(3, 15, 20) # (50, 300, 459) ((320-20)/5)+1 = 28
    self.pool= nn.MaxPool2d(2, 2) # (50, 150, 229)
    self.pool2= nn.MaxPool2d(5, 5) # (50, 150, 229)
    self.conv2 = nn.Conv2d(15, 30, 30) #(30, 120, 199) => (30, 60, 99.5)
    self.conv3 = nn.Conv2d(30, 60, 30) #(60, 30, 69.5) => (60, 15, 35)
    self.fc1 = nn.Linear(5040, 1600)
    self.fc2 = nn.Linear(1600, 800)
    self.fc3 = nn.Linear(800, 8)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool2(F.relu(self.conv3(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [10]:
class NeuralNetV3(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1= nn.Conv2d(3, 15, 20) # (50, 300, 459) ((320-20)/5)+1 = 28
    self.pool= nn.MaxPool2d(2, 2) # (50, 150, 229)
    self.pool2= nn.MaxPool2d(5, 5) # (50, 150, 229)
    self.conv2 = nn.Conv2d(15, 30, 30) #(30, 120, 199) => (30, 60, 99.5)
    self.conv3 = nn.Conv2d(30, 60, 30) #(60, 30, 69.5) => (60, 15, 35)
    self.fc1 = nn.Linear(5040, 2000)
    self.fc2 = nn.Linear(2000, 8)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool2(F.relu(self.conv3(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = self.fc2(x)
    return x

In [11]:
net = NeuralNetV3()
net.to(device=device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.003, momentum=0.9)

In [12]:
def test(best):
    test=NeuralNetV3()
    test.to(device)
    test.load_state_dict(torch.load("./models/self_train/default_last_3_0.pt", weights_only=True))
    test.eval()
    total=0
    correct=0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            images_test, labels_test = data
            images_test, labels_test = images_test.to(device), labels_test.to(device)
            outputs=test(images_test)
            _, predicts = torch.max(outputs, 1)

            total += labels_test.size(0)

            correct += (predicts==labels_test).sum().item()
        
        accuracy = 100*correct/total
        
        if best < accuracy:
            torch.save(test.state_dict(), "./models/self_train/default_best_3_0.pt")
        
        print(f'Accuracy: {accuracy}%')

    return accuracy

In [13]:

def train():
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        output = net(inputs)
        # print(output[0].size())
        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()

    running_loss += loss.item()

    print(f'Loss: {running_loss / len(train_loader): .4f}')

    torch.save(net.state_dict(), "./models/self_train/default_last_3_0.pt")
   
   
        

In [20]:
train()

torch.Size([14280])
torch.Size([14280])
torch.Size([14280])


KeyboardInterrupt: 

In [14]:
best=0
EPOCH=60
for epoch in range(EPOCH):
    print(f'Training epoch {epoch}...')

    train()

    last=test(best)
    if best < last:
        best=last

Training epoch 0...
Loss:  0.0159
Accuracy: 38.42165898617512%
Training epoch 1...
Loss:  0.0086
Accuracy: 46.71658986175115%
Training epoch 2...
Loss:  0.0292
Accuracy: 60.19585253456221%
Training epoch 3...
Loss:  0.0015
Accuracy: 72.1774193548387%
Training epoch 4...
Loss:  0.0086
Accuracy: 68.37557603686636%
Training epoch 5...
Loss:  0.0037
Accuracy: 77.76497695852535%
Training epoch 6...
Loss:  0.0000
Accuracy: 77.47695852534562%
Training epoch 7...
Loss:  0.0003
Accuracy: 82.25806451612904%
Training epoch 8...
Loss:  0.0000
Accuracy: 81.27880184331798%
Training epoch 9...
Loss:  0.0000
Accuracy: 86.00230414746544%
Training epoch 10...
Loss:  0.0000
Accuracy: 84.44700460829493%
Training epoch 11...
Loss:  0.0000
Accuracy: 86.52073732718894%
Training epoch 12...
Loss:  0.0000
Accuracy: 86.29032258064517%
Training epoch 13...
Loss:  0.0000
Accuracy: 86.63594470046083%
Training epoch 14...
Loss:  0.0000
Accuracy: 84.21658986175115%
Training epoch 15...
Loss:  0.0000
Accuracy: 86.002

In [15]:
validate_defult=DataProcess(label_file="./dataset/default/labels/VALIDATE.csv",img_dir="./dataset/default/VALIDATE/",transform=transform)
validate_defult_loader= torch.utils.data.DataLoader(validate_defult, batch_size=10, shuffle=False, num_workers=0)
validate_no_bg=DataProcess(label_file="./dataset/no_bg/labels/VALIDATE.csv",img_dir="./dataset/no_bg/VALIDATE/",transform=transform)
validate_no_bg_loader= torch.utils.data.DataLoader(validate_no_bg, batch_size=10, shuffle=False, num_workers=0)
validate_random_bg=DataProcess(label_file="./dataset/random_bg/labels/VALIDATE.csv",img_dir="./dataset/random_bg/VALIDATE/",transform=transform)
validate_random_bg_loader= torch.utils.data.DataLoader(validate_random_bg, batch_size=10, shuffle=False, num_workers=0)

validate_loaders=[validate_defult_loader,validate_no_bg_loader,validate_random_bg_loader]

In [17]:
def validate(loader: torch.utils.data.DataLoader):
    validate_model = NeuralNetV3()
    validate_model.to(device)
    validate_model.load_state_dict(torch.load("./models/self_train/default_best_3_0.pt", weights_only=True))
    validate_model.eval()
    total=0
    correct=0
    with torch.no_grad():
        for data in loader:
            image, label =data
            image, label = image.to(device), label.to(device)
            outputs=validate_model(image)
            _, predicts = torch.max(outputs, 1)

            total += label.size(0)

            correct += (predicts==label).sum().item()
        
        accuracy = 100*correct/total
        print(f'Accuracy: {accuracy: .4f}')

In [18]:
for loader in validate_loaders:
    validate(loader)

Accuracy:  90.6343
Accuracy:  33.1354
Accuracy:  44.0383
