In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
!nvidia-smi

In [1]:
from torchvision import transforms
import os
import io
import glob
import torch
import numpy as np
import pandas as pd
from PIL import Image
import torch.nn as nn
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
import torch.nn.functional as F
import torchvision.models as models
from sklearn.metrics import f1_score
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import CosineAnnealingLR
from torchvision.transforms import Compose, Lambda, ToTensor, Normalize, Resize, RandomCrop, TenCrop, RandomHorizontalFlip

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

2024-04-24 01:34:14.438916: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-24 01:34:14.577099: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [22]:
# Some settings:
# --------------
train_files = '/home/msai/mkee004/AI6102/extracted_contents/tfrecords-jpeg-224x224/train/*.tfrec'
valid_files = '/home/msai/mkee004/AI6102/extracted_contents/tfrecords-jpeg-224x224/val/*.tfrec'
test_files  = '/home/msai/mkee004/AI6102/extracted_contents/tfrecords-jpeg-224x224/test/*.tfrec'
device      = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 
wdn_epochs    = 20  
lrn_epochs    = 10 
batch_size  = 20                                                           
num_prints  = 10                                                            
train_size  = 12753                                                        
print_freq  = train_size // (batch_size * num_prints) + 1                  
check_freq  = 1                                                            
n_classes = 104
base_lr = 1e-3
classifier_lr = 3e-3
weight_decay = 1e-4

In [3]:
# Convert to df
def tfrecords_to_dataframe(fp, test = False):

    def parse(pb, test = False):
        d = {'id': tf.io.FixedLenFeature([], tf.string), 'image': tf.io.FixedLenFeature([], tf.string)}
        if not test:
            d['class'] = tf.io.FixedLenFeature([], tf.int64)
        return tf.io.parse_single_example(pb, d)

    df = {'id': [], 'img': []} 
    if not test:
        df['lab'] = []
    for sample in tf.data.TFRecordDataset(glob.glob(fp)).map(lambda pb: parse(pb, test)):
        df['id'].append(sample['id'].numpy().decode('utf-8'))
        df['img'].append(sample['image'].numpy())
        if not test:
            df['lab'].append(sample['class'].numpy())
    return pd.DataFrame(df)

In [4]:
def display_images(dataset, n, cols):
    rows = n // cols if n % cols == 0 else n // cols + 1
    plt.figure(figsize = (2 * cols, 2 * rows))
    for i in range(n):
        plt.subplot(rows, cols, i + 1)
        img, lab = dataset[i]
        plt.imshow(img.permute(1, 2, 0).numpy())
        plt.title(str(lab))
        plt.axis('off')
    plt.show()

In [5]:
class Trainset(Dataset):
    def __init__(self, frac=1):
        super().__init__()
        self.df = tfrecords_to_dataframe(train_files).sample(frac=frac).reset_index(drop=True)
        self.transforms = transforms.Compose([
            transforms.Lambda(lambda b: transforms.ToTensor()(Image.open(io.BytesIO(b)))),
            transforms.RandomHorizontalFlip(),
            Resize(np.random.randint(300, 641)),
            transforms.RandomCrop(300),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        #transform = Compose(Resize(np.random.randint(300, 641)))
        img = self.transforms(self.df.iloc[idx]['img'])
        return img, self.df.iloc[idx]['lab']
    

In [6]:
class Evalset(Dataset):
    def __init__(self, frac=1, test=False):
        super().__init__()
        files = valid_files if not test else test_files
        self.df = tfrecords_to_dataframe(files, test).sample(frac=frac,random_state=0).reset_index(drop=True)
        self.transforms = [Compose([
            Lambda(lambda b: Image.open(io.BytesIO(b))),
            Resize(scale),
            TenCrop(300),
            Lambda(lambda xs: torch.stack([ToTensor()(x) for x in xs])),
            Lambda(lambda xs: torch.stack([Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])(x) for x in xs]))
        ]) for scale in [372, 568]]
        self.test = test

    def __len__(self):
        return len(self.df)
    
    def __getitem__(self, i):
        sample = self.df.iloc[i]
        imgs = torch.stack([t(sample['img']) for t in self.transforms])
        return imgs, sample['lab'] if not self.test else sample['id']

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models

class InceptionV3(nn.Module):
    def __init__(self, n_classes, learnable_modules=('AuxLogits', 'fc')):
        super().__init__()
        self.inception_v3 = models.inception_v3(pretrained=True)
        num_ftrs = self.inception_v3.fc.in_features
        self.inception_v3.fc = nn.Linear(num_ftrs, n_classes)

        for name, param in self.inception_v3.named_parameters():
            if name.split('.')[0] in learnable_modules:
                param.requires_grad = True
            else:
                param.requires_grad = False

    def forward(self, x):
        outputs = self.inception_v3(x)
        if isinstance(outputs, torch.Tensor):
            return torch.log_softmax(outputs, dim=1)
        elif hasattr(outputs, 'logits'):
            return torch.log_softmax(outputs.logits, dim=1)
        else:
            raise TypeError("Expected output to have 'logits' attribute")

# Define your model
model = InceptionV3(n_classes=104)

# Specify different learning rates for different parts of the model
# classifier_lr = 0.001
# base_lr = 0.0001
# weight_decay = 0.0005

# Define param_groups for the optimizer
param_groups = [
    {'params': model.inception_v3.fc.parameters(), 'lr': classifier_lr},
    {'params': [p for name, p in model.named_parameters() if 'fc' not in name and p.requires_grad], 'lr': base_lr, 'weight_decay': weight_decay}
]

# Initialize the optimizer
optimizer = optim.Adam(params=param_groups)


In [18]:

# train_loader = DataLoader(train_set, batch_size = batch_size, shuffle = True, num_workers = 2)
# valid_loader = DataLoader(Evalset(frac = 0.20), batch_size = 1, num_workers = 2)
# test_loader  = DataLoader(Evalset(test = True), batch_size = 1, num_workers = 2)

train_loader = DataLoader(Trainset(frac=1), batch_size = batch_size, shuffle = True, num_workers = 0)
valid_loader = DataLoader(Evalset(frac = 0.20), batch_size = 1, num_workers = 0)
#test_loader  = DataLoader(Evalset(test = True,frac=0.1), batch_size = 1, num_workers = 0)


# Display some training images and labels:
# ----------------------------------------
#display_images(train_set, n = 30, cols = 5)

2024-04-24 01:57:39.413349: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2024-04-24 01:57:42.953289: W tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [None]:
valid_loader = DataLoader(Evalset(frac = 0.20), batch_size = 1, num_workers = 0)
len(valid_loader)

In [None]:
len(train_loader)

In [None]:

import torch
from sklearn.metrics import f1_score
import time

loss_fn = F.nll_loss
losses = []
valid_f1s = []
valid_losses = []
max_f1=0
last_loss_state = 1
model.to(device)
name='IV3WD1e_4'
# Initialize a file to store training and validation losses and accuracies
log_filename = f'{name}_Log.txt'
with open(log_filename, 'a') as log_file:
    log_file.write(f'\n{log_filename}\n')
    log_file.write(f'--------------- \n')

epoch_start_time = time.time()

for epoch in range(wdn_epochs):
    print(f"\nEpoch {epoch + 1}/{wdn_epochs}")
    print('-' * 20)
    with open(log_filename, 'a') as log_file:
        log_file.write(f'\n{name} Epoch {epoch + 1}/{wdn_epochs} \n')
        log_file.write(f'--------------- \n')

    # Training phase
    model.train()  # Set model to training mode
    running_loss = 0.0
    for i, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)  # Move inputs to the appropriate device

        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = model(x).to(device)  # Forward pass
        loss = loss_fn(outputs, y)  # Calculate loss
        loss.backward()  # Backward pass
        optimizer.step()  # Optimize

        running_loss += loss.item() * x.size(0)  # Update running loss

        if i % print_freq == 0:
            print(f"Batch {i}, Loss: {loss.item():.4f}")
            with open(log_filename, 'a') as log_file:
                log_file.write(f'Batch {i}, Loss: {loss.item():.4f} \n')
            

    epoch_loss = running_loss / len(train_loader.dataset)  # Calculate average loss for the epoch
    losses.append(epoch_loss)
    print(f"Training Loss: {epoch_loss:.4f}")
    with open(log_filename, 'a') as log_file:
        log_file.write(f'Training Loss: {epoch_loss:.4f} \n')

    # Validation phase
    if epoch % check_freq == 0:
        model.eval()  # Set model to evaluation mode
        running_val_loss = 0.0

        valid_true_labs = []
        valid_pred_labs = []
        
        with torch.no_grad():  # Disables gradient calculation
            for x, y in valid_loader:
                # Assuming the extra dimensions are not needed, and you want to use one of the "sub-batches"
                # Adjust the slicing based on which part of the data you need
                x_corrected = x[:, 0, 0, :, :, :]  # This takes the first "sub-batch" and removes extra dimensions
                x_corrected = x_corrected.to(device)
                y = y.to(device)

                # Now x_corrected should have the shape [1, 3, 300, 300] or similar (adjust based on your needs)
                outputs = model(x_corrected)
                loss = loss_fn(outputs, y)
                running_val_loss += loss.item() * x.size(0)
                
                preds = torch.argmax(outputs, dim=1)
                valid_true_labs.extend(y.tolist())
                valid_pred_labs.extend(preds.tolist())

        epoch_val_loss = running_val_loss / len(valid_loader.dataset)
        valid_losses.append(epoch_val_loss)
        valid_f1 = f1_score(valid_true_labs, valid_pred_labs, average='weighted')
        valid_f1s.append(valid_f1)
        print(f"Validation F1: {valid_f1 * 100:.2f}%, Validation Loss: {epoch_val_loss:.4f}")
        with open(log_filename, 'a') as log_file:
            log_file.write(f'Validation F1: {valid_f1 * 100:.2f}% \n')
            log_file.write(f'Validation Loss: {epoch_val_loss:.4f} \n')

        # # Save model checkpoint
        # torch.save(model.state_dict(), f'./model/{name}_epoch{epoch // check_freq}.pth')

        # Save model checkpoint
        if epoch_loss < last_loss_state:
            last_loss_state = epoch_loss
            best_epoch = epoch
            print(best_epoch)
            if epoch % check_freq == 0:
                torch.save(model.state_dict(), f'./ENLR3e_4Tune{epoch // check_freq}.pth')

    # Adjust learning rate
    scheduler.step()

epoch_end_time = time.time()
epoch_duration = epoch_end_time - epoch_start_time
total_duration = epoch_duration / 60  # Convert to minutes
print(f'Total duration: {total_duration} minutes \n')
with open(log_filename, 'a') as log_file:
    log_file.write(f'Total duration: {total_duration} minutes \n')



Epoch 1/20
--------------------
Batch 0, Loss: 4.7466
Batch 64, Loss: 3.5487


In [None]:
class EfficientNetB0withDropOut(nn.Module):
    def __init__(self, n_classes, learnable_modules=('classifier.1',), dropout_p=0.15):
        super().__init__()
        self.efficientnet_b0 = models.efficientnet_b0(pretrained=True)
        self.efficientnet_b0.classifier[1] = nn.Linear(self.efficientnet_b0.classifier[1].in_features, n_classes)
        self.efficientnet_b0.requires_grad_(False)
        
        modules = dict(self.efficientnet_b0.named_modules())
        for name in learnable_modules:
            if name in modules:
                modules[name].requires_grad_(True)
            else:
                raise ValueError(f"Module name '{name}' not found in the model's named modules.")
        
        # Adding dropout layer with specified dropout probability
        self.dropout = nn.Dropout(p=dropout_p)

    def forward(self, x):
        # Applying dropout before the classifier
        x = self.dropout(x)
        return F.log_softmax(self.efficientnet_b0(x), dim=1)

modeldropout = nn.DataParallel(EfficientNetB0withDropOut(n_classes = 104, learnable_modules = ('features.5.2', 
                                                                             'features.6', 
                                                                             'features.7', 
                                                                             'features.8', 
                                                                             'classifier')))
modeldropout.to(device)

optimizerdropout = torch.optim.Adam(params = [{'params': modeldropout.module.efficientnet_b0.features[5][2].parameters()}, 
                                       {'params': modeldropout.module.efficientnet_b0.features[6].parameters()}, 
                                       {'params': modeldropout.module.efficientnet_b0.features[7].parameters()},
                                       {'params': modeldropout.module.efficientnet_b0.features[8].parameters()},
                                       {'params': modeldropout.module.efficientnet_b0.classifier.parameters(), 'lr': 1e-3}], 
                             lr = learningRate, 
                             weight_decay = weightDecay)

schedulerdropout = CosineAnnealingLR(optimizerdropout, T_max = n_epochs)

losses = []
valid_f1s = []

for epoch in range(n_epochs):
    print(f"\nEpoch {epoch + 1}/{n_epochs}")
    print('-' * 10)

    # Training phase
    modeldropout.train()  # Set model to training mode
    running_loss = 0.0
    for i, (x, y) in enumerate(train_loader):
        x, y = x.to(device), y.to(device)  # Move inputs to the appropriate device

        optimizerdropout.zero_grad()  # Zero the parameter gradients
        outputs = modeldropout(x)  # Forward pass
        loss = loss_fn(outputs, y)  # Calculate loss
        loss.backward()  # Backward pass
        optimizerdropout.step()  # Optimize

        running_loss += loss.item() * x.size(0)  # Update running loss

        if i % print_freq == 0:
            print(f"Batch {i}, Loss: {loss.item():.4f}")

    epoch_loss = running_loss / len(train_loader.dataset)  # Calculate average loss for the epoch
    losses.append(epoch_loss)
    print(f"Training Loss: {epoch_loss:.4f}")

    # Validation phase
    if epoch % check_freq == 0:
        modeldropout.eval()  # Set model to evaluation mode
        valid_true_labs = []
        valid_pred_labs = []
        
        with torch.no_grad():  # Disables gradient calculation
            for x, y in valid_loader:
                # Assuming the extra dimensions are not needed, and you want to use one of the "sub-batches"
                # Adjust the slicing based on which part of the data you need
                x_corrected = x[:, 0, 0, :, :, :]  # This takes the first "sub-batch" and removes extra dimensions
                x_corrected = x_corrected.to(device)
                y = y.to(device)

                # Now x_corrected should have the shape [1, 3, 300, 300] or similar (adjust based on your needs)
                outputs = modeldropout(x_corrected)
                preds = torch.argmax(outputs, dim=1)

                valid_true_labs.extend(y.tolist())
                valid_pred_labs.extend(preds.tolist())

        valid_f1 = f1_score(valid_true_labs, valid_pred_labs, average='weighted')
        valid_f1s.append(valid_f1)
        print(f"Validation F1: {valid_f1 * 100:.2f}%")

        # Save model checkpoint
        torch.save(modeldropout.state_dict(), f'./epochdropout{epoch // check_freq}.pth')

    # Adjust learning rate
    schedulerdropout.step()


In [None]:
import numpy as np
import matplotlib.pyplot as plt

losses = np.random.normal(loc=0.5, scale=0.1, size=n_epochs * 10)  # Example losses
valid_f1s = np.linspace(0.5, 0.9, n_epochs // check_freq)  # Example F1 scores
optimal_epoch = np.argmax(np.array(valid_f1s))

plt.figure(figsize=(12, 6))
plt.subplots_adjust(wspace=0.3)

# Plot for Training Loss
plt.subplot(1, 2, 1)
plt.plot(np.arange(len(losses)) / (len(losses) / n_epochs), losses, color='tab:blue', linewidth=2, label='Training Loss')
plt.xlabel('Epoch', fontsize=12)
plt.ylabel('Loss', fontsize=12)
plt.title('Training Loss Over Epochs', fontsize=14)
plt.grid(True, linestyle='--', alpha=0.5)
plt.legend()

# Plot for Validation F1 Score
ax2 = plt.subplot(1, 2, 2)
ax2.plot(np.arange(len(valid_f1s)) * check_freq, valid_f1s, color='tab:green', linewidth=2, label='Validation F1')
ax2.vlines(optimal_epoch * check_freq, 0, valid_f1s[optimal_epoch], colors='red', linestyles='dashed', label=f'Optimal Epoch ({optimal_epoch * check_freq})')
ax2.set_xlabel('Epoch', fontsize=12)
ax2.set_ylabel('Weighted F1 Score', fontsize=12)
ax2.set_title('Validation F1 Score Over Epochs', fontsize=14)
ax2.set_ylim(0, 1)
ax2.grid(True, linestyle='--', alpha=0.5)
ax2.legend()

# Saving and showing the improved plot
plt.savefig('improved_plot.png')  # Adjust path as needed
plt.show()


In [None]:
ids = []
preds = []
model.eval()
with torch.no_grad():
    for x, y in test_loader:
        ids.append(y[0])
        mean_logp = model(x.view(-1, 3, 300, 300).to(device)).mean(dim = 0)
        preds.append(torch.argmax(mean_logp).item())
submission = pd.DataFrame({'id': ids, 'label': preds})


In [None]:
submission.to_csv('submission.csv', index = False)

In [None]:
submission.head()