In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
%matplotlib inline

import torch
import torch.nn as nn 
import torchvision
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision import transforms
from torchvision.datasets import ImageFolder
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
import torch.optim.lr_scheduler as lr_scheduler
from torchvision.models import vgg16
from catboost import CatBoostClassifier
from torchvision import datasets
from torch.utils.data import DataLoader, Subset, ConcatDataset


import cv2
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.metrics import classification_report, confusion_matrix, ConfusionMatrixDisplay, accuracy_score
import PIL
from PIL import Image

from IPython.display import clear_output
from tqdm import tqdm
import sys
import os
import shutil

#### File I'll use to complete full training of model:

#### First, let's upload the images:

#### ImageFolder sorts by default as I understood

In [3]:
base_transform = transforms.Compose([transforms.Resize((64, 64)),
                                    transforms.ToTensor(), 
                                    transforms.GaussianBlur(kernel_size = (11, 11)),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

base_transform_64 = transforms.Compose([transforms.Resize((64, 64)),
                                    transforms.ToTensor(), 
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

transform_from_torch = transforms.Compose([transforms.Resize((64, 64)), 
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 

transform_from_torch_224 = transforms.Compose([transforms.Resize((224, 224)), 
                                    transforms.ToTensor(),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 

base_augmentation = transforms.Compose([
            transforms.Lambda(lambda img: img.crop((0, 0, 256, 256))),
            transforms.Resize((64, 64)),
            transforms.ToTensor(), 
            transforms.RandomHorizontalFlip(p=1),
            transforms.ColorJitter(brightness=.5, hue=.3),
            # transforms.RandomCrop(size=(64, 64)),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

base_augmentation_256 = transforms.Compose([transforms.Resize((64, 64)),
                                    transforms.ToTensor(), 
                                    transforms.RandomHorizontalFlip(p=0.95),
                                    transforms.GaussianBlur(kernel_size = (11, 11)),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

base_base_augmentation = transforms.Compose([transforms.Resize((64, 64)),
                                    transforms.ToTensor(), 
                                    transforms.RandomHorizontalFlip(p=0.75),
                                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

left_upper_corner_transfrom = transforms.Compose([
            transforms.Lambda(lambda img: img.crop((0, 0, 175, 175))),
                    transforms.Resize((128, 128)),
                                    transforms.ToTensor(), 
                                    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
])

base_augmentation_no_norm = transforms.Compose([
            transforms.Lambda(lambda img: img.crop((0, 0, 175, 175))),
            transforms.Resize((128, 128)),
            transforms.ToTensor(), 
            transforms.RandomHorizontalFlip(p=1),
            transforms.ColorJitter(brightness=.5, hue=.3),
            # transforms.RandomCrop(size=(64, 64)),
            # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
            ])

In [4]:
def get_dataloaders(transform = base_transform_64, 
                    augmentation = base_augmentation,
                    path_first = '/kaggle/input/indoor-outdoor/data/class1',
                      path_second = '/kaggle/input/indoor-outdoor/data/class2', 
                      root_dir = "/kaggle/input/indoor-outdoor/data" , 
                      train_size = 0.7, val_size = 0.15, batch_size = 4, difference = 7500):

    first_class_paths = os.listdir(path_first)
    second_class_paths = os.listdir(path_second)

    size_from_first = int(len(first_class_paths)*0.5)
    size_from_second = int(len(second_class_paths)*0.5)

    indoor_outdoor_dataset = ImageFolder(root=root_dir, transform=transform)
    indoor_outdoor_dataset_augmented = ImageFolder(root=root_dir, transform=augmentation)

    indoor_dataset = Subset(indoor_outdoor_dataset, range(0, size_from_first))
    outdoor_dataset = Subset(indoor_outdoor_dataset, range(len(first_class_paths), len(first_class_paths) + size_from_second - 1))
    outdoor_dataset_augmented = Subset(indoor_outdoor_dataset_augmented,
                                       range(len(first_class_paths), len(first_class_paths) + difference))

    full_data = ConcatDataset([indoor_dataset, outdoor_dataset, outdoor_dataset_augmented])

    train_size = int(train_size * len(full_data))  # 70% for train
    val_size = int(val_size * len(full_data))   # 15% for validation
    test_size = len(full_data) - train_size - val_size  # Rest for the test

    train_dataset, val_dataset, test_dataset = random_split(full_data, [train_size, val_size, test_size], 
                                                            generator=torch.Generator().manual_seed(42))

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


    return train_loader, val_loader, test_loader



In [5]:
# train_loader, val_loader, test_loader = get_dataloaders(transform=transform_from_torch_224,
#                                                          augmentation=base_augmentation_224,
#                                                          batch_size=32)

# For 0.5 0.5 Network

# train_loader, val_loader, test_loader = get_dataloaders(transform=base_transform_64,
#                                                          augmentation=base_augmentation,
#                                                          batch_size=250, difference=12000) # Best Right Now

train_loader, val_loader, test_loader = get_dataloaders(transform=base_transform,
                                                         augmentation=base_augmentation_256,
                                                         batch_size=128, difference=12000)

In [None]:
def compare_length_datasets(dataloader):

    '''Function that calcs number of labels presented in dataset'''
    
    labels_total = np.array([])

    for batch in tqdm(dataloader):

        images, labels = batch

        labels = labels.cpu().numpy()
        labels_total = np.append(labels_total, labels)
    
    return labels_total

labels_array = compare_length_datasets(train_loader)

In [None]:
labels_array_test, labels_array_val = compare_length_datasets(test_loader), compare_length_datasets(val_loader)

In [None]:
def get_classes_info(array):
    '''Prints some info about data'''  

    print(f"Number of images in dataset: {len(array)}")
    print(f"Number of classes presented in data: {len(np.unique(array))}")

    print()
    
    mask_class_1 = np.where(array ==  0)
    mask_class_2 = np.where(array == 1)

    first_class = array[mask_class_1]
    second_class = array[mask_class_2]

    print(f"Number of images by class 0: {len(first_class)}")
    print(f"Number of images by class 1: {len(second_class)}")

    print()

    if len(first_class) == len(second_class):
        print("No disbalance in data")
        print()
    
    else:
        if len(first_class) > len(second_class):
            print("Disbalance: number in first is greater")
        else:
            print("Disbalance: number in second is greater")
        print(f"Difference: {abs(len(first_class) - len(second_class))}")


get_classes_info(labels_array)

In [None]:
get_classes_info(labels_array_test)

In [None]:
get_classes_info(labels_array_val)

In [None]:
def print_info(train_loader, val_loader, test_loader):

    print(f"Length of train_loader: {len(train_loader)}")
    print(f"Length of val_loader: {len(val_loader)}")
    print(f"length of test_loader: {len(test_loader)}")

print_info(train_loader, val_loader, test_loader)

#### Ok, finally we acquired that. Let's show how the images look like (64 x 64, 0.5, 0.5):

In [None]:
def matplotlib_imshow(img, one_channel=False):

    if one_channel:
        img = img.mean(dim=0)
    img = img * 0.5 + 0.5     # unnormalize
    npimg = img.numpy()
    if one_channel:
        plt.imshow(npimg, cmap="Greys")
    else:
        plt.imshow(np.transpose(npimg, (1, 2, 0)))


dataiter = iter(train_loader)
images, labels = next(dataiter)

img_grid = torchvision.utils.make_grid(images[0])

matplotlib_imshow(img_grid, one_channel=False)
print(len(labels))

In [None]:
print_info(train_loader, val_loader, test_loader)

#### Check batches:

In [None]:
def check_after_dataload(dataloader, break_iter = True):
    
    for images, labels in dataloader:
        print(images.size(), labels)
        if break_iter:
            break
        
check_after_dataload(train_loader)

In [None]:
writer = SummaryWriter('runs/running_indoor_outdoor_model_double_convolution')

#### Class DoubleConv (64 x 64, 0.5, 0.5):

In [6]:
class CNNIndoorOutdoorDoubleConv(nn.Module):
    
    def __init__(self):
        super(CNNIndoorOutdoorDoubleConv, self).__init__()
        self.convolution_layers = nn.Sequential(
            
            nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),

            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride = 1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )

        self.fully_connected_layers = nn.Sequential(
            nn.Linear(128 * 4 * 4, 2048),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(2048, 1024),  
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 1),
            nn.Sigmoid(),
        )

    def forward(self, input_image):
        x = self.convolution_layers(input_image)
        x = x.view(x.size(0), -1)  
        x = self.fully_connected_layers(x)
        
        return x

In [7]:
modelCNNIndoorOutdoorDoubleConv = CNNIndoorOutdoorDoubleConv()  
optimizer = optim.Adam(modelCNNIndoorOutdoorDoubleConv.parameters(), lr=0.001) 
criterion = nn.BCELoss()

In [8]:
optimizer_adadelta = optim.Adadelta(modelCNNIndoorOutdoorDoubleConv.parameters(), lr=0.0005) 

In [9]:
def save_checkpoint(state, filename='checkpoint.pth'):
    """Save model state"""
    torch.save(state, filename)

def load_checkpoint(modelCNN, optimizer, filename='checkpoint.pth'):
    
    checkpoint = torch.load(filename)
    modelCNN.load_state_dict(checkpoint['model_state_dict']) 
    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])  
    epoch = checkpoint['epoch'] 
    loss = checkpoint['loss'] 
    accuracy = checkpoint['accuracy']  
    print(f"Checkpoint loaded. Epoch: {epoch}, Loss: {loss}, Accuracy: {accuracy}")
    return epoch, loss, accuracy

def train_model(model, train_loader, val_loader, criterion, optimizer, file, device = 'cpu', num_epochs=10, 
                first_flag = True,
                optimizer_new_flag = False, optimizer_new = optimizer_adadelta):
    if first_flag:
        best_val_acc = 0.0
        epoch_last = 0
    else:
        epoch_last, loss, accuracy = load_checkpoint(model, optimizer, file)
        if optimizer_new_flag:
            optimizer = optimizer_new

        best_val_acc = accuracy
    
    model = model.to(device)

    
    for epoch in tqdm(range(num_epochs)):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0
        
        
        for inputs, labels in (train_loader):
            inputs, labels = inputs.to(device), labels.to(device).float()

            optimizer.zero_grad()

            outputs = model(inputs)

            loss = criterion(outputs, labels.view(labels.size(0), -1))
            
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()
            predicted = (outputs.view(-1) > 0.5).long() 
            correct += (predicted == labels).sum().item()
            total += labels.size(0)
        
        
        train_loss = running_loss / len(train_loader)
        train_acc = correct / total
        
        val_loss, val_acc = evaluate_model(model, val_loader, criterion, device)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_acc:.4f}')
        
        writer.add_scalar('Loss/train', train_loss, epoch + epoch_last)
        writer.add_scalar('Accuracy/train', train_acc, epoch + epoch_last)
        writer.add_scalar('Loss/val', val_loss, epoch + epoch_last)
        writer.add_scalar('Accuracy/val', val_acc, epoch + epoch_last)
        

        if val_acc > best_val_acc:
            print(f'Validation accuracy improved from {best_val_acc:.4f} to {val_acc:.4f}. Saving model...')
            best_val_acc = val_acc
            
            checkpoint = {
                'epoch': epoch + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': val_loss,
                'accuracy': val_acc
            }
            save_checkpoint(checkpoint, file)
            
    
def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    correct = 0
    total = 0
    running_loss = 0
    with torch.no_grad():
        for inputs, labels in (test_loader):
            inputs, labels = inputs.to(device), labels.to(device).float()
            outputs = model(inputs)
            loss = criterion(outputs, labels.view(labels.size(0), -1))
            running_loss += loss
            predicted = (outputs.view(-1) > 0.5).long() 
            # print(predicted)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / len(test_loader)  
    accuracy = correct / total 
    
    print(f'Accuracy on test set: {accuracy:.4f}')
    print(f'Average Loss on test set: {avg_loss:.4f}')

    return avg_loss, accuracy

def predict_model(model, test_loader, device):
    model.eval()

    predicted_array = []
    labels_array = []
    # correct = 0
    # total = 0
    # running_loss = 0
    with torch.no_grad():
        for i, (inputs, labels) in enumerate(tqdm(test_loader)):
            inputs, labels = inputs.to(device), labels.to(device).float().numpy()
            outputs = model(inputs)
            # loss = criterion(outputs, labels.view(labels.size(0), -1))
            # running_loss += loss
            predicted = (outputs.view(-1) > 0.5).long().numpy()
            # print(predicted)
            predicted_array.append(predicted)
            labels_array.append(labels)
            # print(predicted)
            # correct += (predicted == labels).sum().item()
            # total += labels.size(0)
            # if i > threshold_test:
            #     break

    # avg_loss = running_loss / len(test_loader)  
    # accuracy = correct / total 
    
    # print(f'Accuracy on test set: {accuracy:.4f}')
    # print(f'Average Loss on test set: {avg_loss:.4f}')

    accuracy = accuracy_score(y_pred=np.concatenate(predicted_array), y_true=np.concatenate(labels_array))
    classification_report_task = classification_report(y_pred=np.concatenate(predicted_array), y_true=np.concatenate(labels_array))
    confusion_matrix_task = confusion_matrix(y_pred=np.concatenate(predicted_array), y_true=np.concatenate(labels_array))

    return classification_report_task, confusion_matrix_task, accuracy

In [None]:
load_checkpoint(modelCNNIndoorOutdoorDoubleConv, optimizer, filename='double_conv_model.pth')

#### Start training now (more data):

In [None]:
train_model(modelCNNIndoorOutdoorDoubleConv, train_loader, val_loader, criterion, optimizer,
             file = 'double_conv_model.pth', num_epochs=3, first_flag=False)

In [None]:
load_checkpoint(modelCNNIndoorOutdoorDoubleConv, optimizer, filename='double_conv_model.pth')

In [None]:
classification_report_task, confusion_matrix_task, accuracy = predict_model(modelCNNIndoorOutdoorDoubleConv, test_loader, 'cpu')

In [None]:
print(accuracy)

In [None]:
print(classification_report_task)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=confusion_matrix_task)
disp.plot()
plt.show()

#### Let's look at the train_loss distrubution (probably, it's actual to filter some samples or augment them):

In [None]:
def imshow(img, title):
    """image show function"""
    img = img / 2 + 0.5  # unnormalize
    np_img = img.numpy()
    plt.imshow(np.transpose(np_img, (1, 2, 0)))
    plt.title(title)
    plt.show()

def inference_and_visualize_errors(model, test_loader, device ='cpu'):
    incorrect_images = []
    incorrect_labels = []
    incorrect_preds = []
    losses = []

    with torch.no_grad():
        for i, (inputs, labels) in enumerate(tqdm(test_loader)):
            inputs, labels = inputs.to(device), labels.to(device).float()

            outputs = model(inputs)
            
            predicted = (outputs > 0.5).float().view(-1)
            loss = criterion(outputs, labels.view(labels.size(0), -1))


            incorrect_indices = (predicted != labels).nonzero(as_tuple=True)[0]
            losses.append(loss.item())

            for idx in incorrect_indices:
                incorrect_images.append(inputs[idx].cpu())
                incorrect_labels.append(labels[idx].cpu().item())
                incorrect_preds.append(predicted[idx].cpu().item())
            

    for i in range(min(10, len(incorrect_images))):
        imshow(incorrect_images[i], f'Actual: {int(incorrect_labels[i])}, Predicted: {int(incorrect_preds[i])}')

        writer.add_image(f'Error_{i}', incorrect_images[i])
        writer.add_text(f'Error_{i}', f'Actual: {int(incorrect_labels[i])}, Predicted: {int(incorrect_preds[i])}')
    return losses

losses = inference_and_visualize_errors(modelCNNIndoorOutdoorDoubleConv, train_loader)


#### This model is for 64x64 size, transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) and without augmentation

#### Next is 64x64, transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) and augmented

#### And plot the distribution of losses via batches:

In [None]:
plt.hist(losses)
plt.title('Histogram of Sample Losses')
plt.xlabel('Loss')
plt.ylabel('Frequency')
plt.show()

#### Augmentated:

In [10]:
class CNNIndoorOutdoorDoubleConvAugmentatedWithResidual(nn.Module):
    def __init__(self):
        super(CNNIndoorOutdoorDoubleConvAugmentatedWithResidual, self).__init__()
        
        self.init_conv = nn.Conv2d(in_channels = 3, out_channels = 64, kernel_size = 3, stride = 2)
        
        self.conv1 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
#             nn.MaxPool2d(kernel_size=2, stride=2)
        )

        self.conv4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU()
        )

        self.shortcut1 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=64, kernel_size=1, stride=1),
            nn.BatchNorm2d(64)
        )

        self.shortcut2 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=1, stride=1),
            nn.BatchNorm2d(32)
        )

        self.shortcut3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=1, stride=1),
            nn.BatchNorm2d(64)
        )
        
        self.shortcut4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=128, kernel_size=1, stride=1),
            nn.BatchNorm2d(128)
        )

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))

        self.fully_connected_layers = nn.Sequential(
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 1),
            nn.Sigmoid(),
        )

    def forward(self, input_image):
        x = self.init_conv(input_image)
#         print(self.shortcut1(input_image))
#         print(x1.size(), self.shortcut1(x1).size())
        x1 = x + self.shortcut1(x)
#         print(x1.size())
#         print(self.conv2(x1).size())
#         print(self.shortcut2(x1).size())
        x2 = self.conv2(x1) + self.shortcut2(x1)  # Skip connection
        x3 = self.conv3(x2) + self.shortcut3(x2)  # Skip connection
        x4 = self.conv4(x3) + self.shortcut4(x3)  # Skip connection

        x = self.avgpool(x4)
        x = x.view(x.size(0), -1)
        x = self.fully_connected_layers(x)

        return x




In [25]:
modelCNNIndoorOutdoorDoubleConvAugmentated = CNNIndoorOutdoorDoubleConvAugmentatedWithResidual()
modelCNNIndoorOutdoorDoubleConvAugmentated = modelCNNIndoorOutdoorDoubleConvAugmentated.to('cuda')
optimizer = optim.Adam(modelCNNIndoorOutdoorDoubleConvAugmentated.parameters(), lr=0.0001) 
criterion = nn.BCELoss()

In [26]:
writer = SummaryWriter('runs/running_indoor_outdoor_model_double_convolution_augmentated_blur_with_residual')

In [27]:
load_checkpoint(modelCNNIndoorOutdoorDoubleConvAugmentated, optimizer, filename='/kaggle/input/2nd-model/pytorch/9021/1/double_conv_model_augmentated_blur_residual-2.pth')

Checkpoint loaded. Epoch: 5, Loss: 0.23718620836734772, Accuracy: 0.9052747859707263


  checkpoint = torch.load(filename)


(5, tensor(0.2372, device='cuda:0'), 0.9052747859707263)

In [28]:
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.001
    maximize: False
    weight_decay: 0
)

In [None]:
train_model(modelCNNIndoorOutdoorDoubleConvAugmentated, train_loader, val_loader, criterion, optimizer,
             file = 'double_conv_model_augmentated_blur_residual.pth', num_epochs=40, device = 'cuda', 
             first_flag=True, optimizer_new_flag=True, optimizer_new = optim.Adam(modelCNNIndoorOutdoorDoubleConvAugmentated.parameters(), lr = 0.0001))

  2%|▎         | 1/40 [14:12<9:14:00, 852.31s/it]

Accuracy on test set: 0.9038
Average Loss on test set: 0.2414
Epoch [1/40], Loss: 0.2296, Accuracy: 0.9093, Val Loss: 0.2414, Val Accuracy: 0.9038
Validation accuracy improved from 0.0000 to 0.9038. Saving model...


#### Evaluate model (This was trained not with additional augmented 2nd class samples):

In [None]:
cp, cm, acc = predict_model(modelCNNIndoorOutdoorDoubleConvAugmentated, test_loader, 'cpu')

In [None]:
print(acc)

In [None]:
print(cp)

In [None]:
disp = ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()
plt.show()

#### Left upper corner train (Try):

In [None]:
class CNNIndoorOutdoorDoubleConvAugmentatedCrop(CNNIndoorOutdoorDoubleConvAugmentated):
    
    def __init__(self):
        super(CNNIndoorOutdoorDoubleConvAugmentatedCrop, self).__init__()

        self.fully_connected_layers = nn.Sequential(
            nn.Linear(8192, 2048),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(2048, 1024),  
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(1024, 1),
            nn.Sigmoid(),
        )
        
       

In [None]:
cropped_model_new = CNNIndoorOutdoorDoubleConvAugmentatedCrop()

optimizer = optim.Adam(cropped_model_new.parameters(), lr=0.0005) 
criterion = nn.BCELoss()
writer = SummaryWriter('runs/running_indoor_outdoor_model_double_convolution_augmentated_cropped_new')
optimizer_new = optim.Adadelta(cropped_model_new.parameters(), lr=0.01) 

In [None]:
dataiter = iter(train_loader)
images, labels = next(dataiter)
print(cropped_model_new.convolution_layers(images).shape)

In [None]:
import torch
from torchvision import transforms
from PIL import Image

image = Image.open('/Users/chervonikov_alexey/Desktop/projects/Technopark_Autumn_2024/Нейронки/third_lesson_cnn/HomeWorkFolder/data/class1/00000001_11016.jpg')
# image.show()

cropped_image_tensor = left_upper_corner_transfrom(image)

image_tensor_numpy = cropped_image_tensor.permute(1, 2, 0).numpy()

plt.imshow(image_tensor_numpy)
plt.axis('off') 
plt.show()


In [None]:
train_model(cropped_model_new, train_loader, val_loader, criterion, optimizer,
             file = 'double_conv_model_augmentated_croped_new.pth', num_epochs=9, first_flag=True, optimizer_new_flag=False,
             optimizer_new=optimizer_new)

#### Overfitting started from 9 epoch... But on train it grows