In [None]:
!pip install transformers datasets
!pip install -q kaggle

from google.colab import drive
drive.mount('/content/drive')

data_load='/content/data/'
data_save='/content/data/drive/MyDrive/RS/Weather/'

import torch, torch.nn as nn, torch.nn.functional as F
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import os
import shutil
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from torchvision.models import regnet_y_32gf, RegNet_Y_32GF_Weights, efficientnet_v2_l, EfficientNet_V2_L_Weights, vit_b_16, ViT_B_16_Weights, convnext_base, ConvNeXt_Base_Weights
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d jehanbhathena/weather-dataset

In [None]:
!unzip weather-dataset -d data

In [None]:
import os
import shutil
data_path=data_load+'/dataset'
data_new=data_load+'/weather_all'
!mkdir data_load+'/weather_all'
labels=[]
for i in os.listdir(data_path):
  for j in os.listdir(data_path+'/'+i):
    labels.append([j, i])
    shutil.move(data_path+'/'+i+'/'+j, data_new+'/'+j)
data=pd.DataFrame(labels, columns=['file', 'label'])
data

In [None]:
label_matcher={}
for i, j in enumerate(np.unique(data['label'])):
  data['label'][data['label']==j]=i
  label_matcher[i]=j
data

In [None]:
data_train, data_test = train_test_split(data, test_size=0.1, random_state=42)
data_train, data_val = train_test_split(data_train, test_size=0.2, random_state=42)

In [None]:
len(data_train), len(data_test), len(data_val)

In [None]:
class DS(Dataset):
    def __init__(self, labels, root_dir, subset=False, transform=None):
        self.labels = labels
        self.root_dir = root_dir
        self.transform = transform
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        img_name = self.labels.iloc[idx, 0]
        fullname = os.path.join(self.root_dir, img_name)
        image = Image.open(fullname).convert('RGB')
        labels = self.labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return [image, labels]

In [None]:
def train_model(dataloaders,
                model,
                criterion,
                optimizer,
                num_epochs=1,
                file_name='model.pth'):
    train_losses = np.zeros((num_epochs))
    test_losses = np.zeros((num_epochs))
    k=0
    best_loss=np.inf
    for epoch in range(num_epochs):
      loss_avg = 0
      for x, y in dataloaders['train']:
          logits = model(x.to(device)).cpu()
          loss = criterion(logits, y)
          loss.backward()
          optimizer.step()
          loss_avg += loss.item()
          optimizer.zero_grad()
      loss_avg = loss_avg / len(dataloaders['train'])

      train_losses[epoch] = loss_avg

      loss_avg = 0
      for x, y in dataloaders['valid']:
          logits = model(x.to(device)).cpu()
          loss = criterion(logits, y)
          loss_avg += loss.item()
      loss_avg = loss_avg / len(dataloaders['valid'])
      if loss_avg<best_loss: #early stop
        best_loss=loss_avg
        k=0
        torch.save(model.state_dict(), file_name) #saving model with best loss
      else: k+=1
      if k==20:
        print("No quality improvement in 20 last steps. Training stopped earlier")
        break

      test_losses[epoch] = loss_avg
      print("Epoch %d/%d: Train loss = %.4f - Validation loss = %.4f" 
            % (epoch + 1, num_epochs, train_losses[epoch], test_losses[epoch]))
    
    return model

In [None]:
weights=RegNet_Y_32GF_Weights.DEFAULT
preprocess = weights.transforms()
regnet = regnet_y_32gf(weights=weights)

train_ds = DS(data_train, data_new, transform=preprocess)
valid_ds = DS(data_val, data_new , transform=preprocess)
test_ds = DS(data_test, data_new, transform=preprocess)
file=data_save+'regnet.pth'

nw=2
bs=100
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=nw)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=True, num_workers=nw)
test_dl = DataLoader(test_ds, batch_size=bs, shuffle=True, num_workers=nw)

for param in regnet.parameters():
    param.requires_grad = False

In [None]:
dloaders = {'train':train_dl, 'valid':valid_dl}

In [None]:
regnet.fc = nn.Linear(regnet.fc.in_features, 11)
regnet = regnet.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(regnet.parameters(), lr=1e-4)
regnet_tuned=train_model(dloaders, regnet, criterion, optimizer, 200, file)

In [None]:
regnet_tuned.load_state_dict(torch.load(file))

In [None]:
y_pred=[]
y_true=[]
for x, y in test_dl:
    logits =  regnet_tuned(x.to(device)).cpu()
    y_pred.append(logits.max(1)[1].data)
    y_true.append(y.data)
y_pred=list(torch.cat(y_pred, dim=0).numpy())
y_true=list(torch.cat(y_true, dim=0).numpy())
print(classification_report(y_true, y_pred))

In [None]:
weights=EfficientNet_V2_L_Weights.DEFAULT
preprocess = weights.transforms()
enet = efficientnet_v2_l(weights=weights)

train_ds = DS(data_train, data_new, transform=preprocess)
valid_ds = DS(data_val, data_new , transform=preprocess)
test_ds = DS(data_test, data_new, transform=preprocess)
file=data_save+'enet.pth'

nw=2
bs=100
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=nw)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=True, num_workers=nw)
test_dl = DataLoader(test_ds, batch_size=bs, shuffle=True, num_workers=nw)

for param in enet.parameters():
    param.requires_grad = False

In [None]:
dloaders = {'train':train_dl, 'valid':valid_dl}

In [None]:
enet.classifier[1] = nn.Linear(enet.classifier[1].in_features, 11)
enet = enet.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(enet.parameters(), lr=1e-4)
enet_tuned=train_model(dloaders, enet, criterion, optimizer, 200-15, file)

In [None]:
enet_tuned.load_state_dict(torch.load(file))

In [None]:
y_pred=[]
y_true=[]
for x, y in test_dl:
    logits =  enet_tuned(x.to(device)).cpu()
    y_pred.append(logits.max(1)[1].data)
    y_true.append(y.data)
y_pred=list(torch.cat(y_pred, dim=0).numpy())
y_true=list(torch.cat(y_true, dim=0).numpy())
print(classification_report(y_true, y_pred))

In [None]:
weights=ViT_B_16_Weights.DEFAULT
preprocess = weights.transforms()
vit = vit_b_16(weights=weights)

train_ds = DS(data_train, data_new, transform=preprocess)
valid_ds = DS(data_val, data_new , transform=preprocess)
test_ds = DS(data_test, data_new, transform=preprocess)
file=data_save+'vit.pth'

nw=2
bs=100
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=nw)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=True, num_workers=nw)
test_dl = DataLoader(test_ds, batch_size=bs, shuffle=True, num_workers=nw)

for param in vit.parameters():
    param.requires_grad = False

In [None]:
dloaders = {'train':train_dl, 'valid':valid_dl}

In [None]:
vit.heads.head = nn.Linear(vit.heads.head.in_features, 11)
vit = vit.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vit.parameters(), lr=1e-4)
vit_tuned=train_model(dloaders, vit, criterion, optimizer, 200-38, file)

In [None]:
vit_tuned.load_state_dict(torch.load(file))

In [None]:
y_pred=[]
y_true=[]
for x, y in test_dl:
    logits =  vit_tuned(x.to(device)).cpu()
    y_pred.append(logits.max(1)[1].data)
    y_true.append(y.data)
y_pred=list(torch.cat(y_pred, dim=0).numpy())
y_true=list(torch.cat(y_true, dim=0).numpy())
print(classification_report(y_true, y_pred))

In [None]:
weights=ConvNeXt_Base_Weights.DEFAULT
preprocess = weights.transforms()
convnext = convnext_base(weights=weights)

train_ds = DS(data_train, data_new, transform=preprocess)
valid_ds = DS(data_val, data_new , transform=preprocess)
test_ds = DS(data_test, data_new, transform=preprocess)
file=data_save+'convnext.pth'

nw=2
bs=100
train_dl = DataLoader(train_ds, batch_size=bs, shuffle=True, num_workers=nw)
valid_dl = DataLoader(valid_ds, batch_size=bs, shuffle=True, num_workers=nw)
test_dl = DataLoader(test_ds, batch_size=bs, shuffle=True, num_workers=nw)

for param in convnext.parameters():
    param.requires_grad = False

In [None]:
dloaders = {'train':train_dl, 'valid':valid_dl}

In [None]:
convnext.classifier[2] = nn.Linear(convnext.classifier[2].in_features, 11)
convnext = convnext.to(device)

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(convnext.parameters(), lr=1e-4)
convnext_tuned=train_model(dloaders, convnext, criterion, optimizer, 200, file)

In [None]:
convnext_tuned.load_state_dict(torch.load(file))

In [None]:
y_pred=[]
y_true=[]
for x, y in test_dl:
    logits =  convnext_tuned(x.to(device)).cpu()
    y_pred.append(logits.max(1)[1].data)
    y_true.append(y.data)
y_pred=list(torch.cat(y_pred, dim=0).numpy())
y_true=list(torch.cat(y_true, dim=0).numpy())
print(classification_report(y_true, y_pred))