Self written AI

Import

In [1]:
import numpy as np
import PIL

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms

import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader

import os

In [2]:
torch.cuda.is_available()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.3, 0.3, 0.3), (0.3,0.3,0.3))
])

In [4]:
class DataProcess():
    def __init__(self, label_file, img_dir, transform=None, target_transform=None):
        self.img_labels = pd.read_csv(label_file, header=None)
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
        image = PIL.Image.open(img_path)
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [13]:
train_data=DataProcess(label_file="./dataset/random_bg/labels/TRAIN.csv",img_dir="./dataset/random_bg/TRAIN/",transform=transform)
test_data=DataProcess(label_file="./dataset/random_bg/labels/TEST.csv",img_dir="./dataset/random_bg/TEST/",transform=transform)

In [6]:
image,label=train_data[0]
image.size()

torch.Size([3, 320, 479])

In [20]:
train_loader= torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True, num_workers=0)
test_loader= torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=False, num_workers=0)

In [14]:
class NeuralNet(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1= nn.Conv2d(3, 30, 20) # (50, 300, 459) ((320-20)/5)+1 = 28
    self.pool= nn.MaxPool2d(4, 4) # (50, 150, 229)
    self.conv2 = nn.Conv2d(30, 60, 60) #(30, 120, 199) => (30, 60, 99.5)
    self.fc1 = nn.Linear(3360, 1600)
    self.fc2 = nn.Linear(1600, 800)
    self.fc3 = nn.Linear(800, 8)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [15]:
class NeuralNetV2(nn.Module):
  def __init__(self):
    super().__init__()

    self.conv1= nn.Conv2d(3, 15, 20) # (50, 300, 459) ((320-20)/5)+1 = 28
    self.pool= nn.MaxPool2d(2, 2) # (50, 150, 229)
    self.pool2= nn.MaxPool2d(5, 5) # (50, 150, 229)
    self.conv2 = nn.Conv2d(15, 30, 30) #(30, 120, 199) => (30, 60, 99.5)
    self.conv3 = nn.Conv2d(30, 60, 30) #(60, 30, 69.5) => (60, 15, 35)
    self.fc1 = nn.Linear(5040, 1600)
    self.fc2 = nn.Linear(1600, 800)
    self.fc3 = nn.Linear(800, 8)

  def forward(self, x):
    x = self.pool(F.relu(self.conv1(x)))
    x = self.pool(F.relu(self.conv2(x)))
    x = self.pool2(F.relu(self.conv3(x)))
    x = torch.flatten(x, 1)
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    return x

In [16]:
net = NeuralNetV2()
net.to(device=device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [21]:
def test(best):
    test=NeuralNetV2()
    test.to(device)
    test.load_state_dict(torch.load("./models/self_train/random_bg_last_2_0_new.pt", weights_only=True))
    test.eval()
    total=0
    correct=0
    with torch.no_grad():
        for i, data in enumerate(test_loader):
            images_test, labels_test = data
            images_test, labels_test = images_test.to(device), labels_test.to(device)
            outputs=test(images_test)
            _, predicts = torch.max(outputs, 1)

            total += labels_test.size(0)

            correct += (predicts==labels_test).sum().item()
        
        accuracy = 100*correct/total
        
        if best < accuracy:
            torch.save(test.state_dict(), "./models/self_train/random_bg_best_2_0_new.pt")
        
        print(f'Accuracy: {accuracy}%')

    return accuracy

In [22]:

def train():
    running_loss = 0.0
    for i, data in enumerate(train_loader):
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        output = net(inputs)
        # print(output[0].size())
        loss = loss_function(output, labels)
        loss.backward()
        optimizer.step()

    running_loss += loss.item()

    print(f'Loss: {running_loss / len(train_loader): .4f}')

    torch.save(net.state_dict(), "./models/self_train/random_bg_last_2_0_new.pt")
   
   
        

In [17]:
train()

torch.Size([5040])
torch.Size([5040])
torch.Size([5040])
torch.Size([5040])
torch.Size([5040])
torch.Size([5040])
torch.Size([5040])


KeyboardInterrupt: 

In [23]:
best=0
EPOCH=60
for epoch in range(EPOCH):
    print(f'Training epoch {epoch}...')

    train()

    last=test(best)
    if best < last:
        best=last

Training epoch 0...
Loss:  0.0315
Accuracy: 25.14792899408284%
Training epoch 1...
Loss:  0.0532
Accuracy: 26.035502958579883%
Training epoch 2...
Loss:  0.0673
Accuracy: 23.609467455621303%
Training epoch 3...
Loss:  0.0482
Accuracy: 25.14792899408284%
Training epoch 4...
Loss:  0.0376
Accuracy: 24.437869822485208%
Training epoch 5...
Loss:  0.0403
Accuracy: 41.005917159763314%
Training epoch 6...
Loss:  0.0128
Accuracy: 44.260355029585796%
Training epoch 7...
Loss:  0.0374
Accuracy: 44.97041420118343%
Training epoch 8...
Loss:  0.0154
Accuracy: 46.92307692307692%
Training epoch 9...
Loss:  0.0094
Accuracy: 55.680473372781066%
Training epoch 10...
Loss:  0.0328
Accuracy: 53.72781065088758%
Training epoch 11...
Loss:  0.0044
Accuracy: 54.260355029585796%
Training epoch 12...
Loss:  0.0048
Accuracy: 66.44970414201184%
Training epoch 13...
Loss:  0.0003
Accuracy: 68.99408284023669%
Training epoch 14...
Loss:  0.0763
Accuracy: 73.31360946745562%
Training epoch 15...
Loss:  0.0024
Accuracy

In [9]:
validate_defult=DataProcess(label_file="./dataset/default/labels/VALIDATE.csv",img_dir="./dataset/default/VALIDATE/",transform=transform)
validate_defult_loader= torch.utils.data.DataLoader(validate_defult, batch_size=10, shuffle=False, num_workers=0)
validate_no_bg=DataProcess(label_file="./dataset/no_bg/labels/VALIDATE.csv",img_dir="./dataset/no_bg/VALIDATE/",transform=transform)
validate_no_bg_loader= torch.utils.data.DataLoader(validate_no_bg, batch_size=10, shuffle=False, num_workers=0)
validate_random_bg=DataProcess(label_file="./dataset/random_bg/labels/VALIDATE.csv",img_dir="./dataset/random_bg/VALIDATE/",transform=transform)
validate_random_bg_loader= torch.utils.data.DataLoader(validate_random_bg, batch_size=10, shuffle=False, num_workers=0)

validate_loaders=[validate_defult_loader,validate_no_bg_loader,validate_random_bg_loader]

In [10]:
def validate(loader: torch.utils.data.DataLoader):
    validate_model = NeuralNet()
    validate_model.to(device)
    validate_model.load_state_dict(torch.load("./models/self_train/random_bg_best_2_0.pt", weights_only=True))
    validate_model.eval()
    total=0
    correct=0
    with torch.no_grad():
        for data in loader:
            image, label =data
            image, label = image.to(device), label.to(device)
            outputs=validate_model(image)
            _, predicts = torch.max(outputs, 1)

            total += label.size(0)

            correct += (predicts==label).sum().item()
        
        accuracy = 100*correct/total
        print(f'Accuracy: {accuracy: .4f}')

In [12]:
for loader in validate_loaders:
    validate(loader)

Accuracy:  58.8026
Accuracy:  51.4252
Accuracy:  93.9485
