# Imports

In [None]:
import os
import copy
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import Subset
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch import optim
from torchvision.models import resnet152
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_curve
warnings.filterwarnings("ignore")

%matplotlib inline
sns.set_style("darkgrid")

# Data Visualization

In [None]:
'''import os
import torch
from torchvision import transforms, datasets
from PIL import Image

# Check if a GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Path to the main "data" directory
data_dir = "/kaggle/input/5class-weather-status-image-classification/data"

# Create a PyTorch ImageFolder dataset with the original data
original_dataset = datasets.ImageFolder(data_dir, transform=transforms.ToTensor())

# Define data augmentation transformations
augmentation_transforms = transforms.Compose([
    transforms.RandomRotation(degrees=15),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomApply([transforms.GaussianBlur(kernel_size=5)], p=0.5),
    transforms.RandomApply([transforms.RandomAffine(degrees=0, translate=(0.1, 0.1))], p=0.5),
])

# Path to the directory where augmented data should be saved
augmented_data_dir = "/kaggle/working/augmented_data"  # Replace with your desired directory path
os.makedirs(augmented_data_dir, exist_ok=True)

# Define a function to apply augmentation to an image using GPU
def apply_augmentation(image, augmentation_transforms):
    image = image.to(device)  # Move the image to the GPU
    augmented_image = augmentation_transforms(image)
    augmented_image = augmented_image.cpu()  # Move the augmented image back to the CPU
    return augmented_image

# Iterate through the original dataset, apply augmentation, and save the augmented images
for i, (image, label) in enumerate(original_dataset):
    class_folder = original_dataset.classes[label]
    class_dir = os.path.join(augmented_data_dir, class_folder)
    os.makedirs(class_dir, exist_ok=True)

    # Apply augmentation to the image
    augmented_image = apply_augmentation(image, augmentation_transforms)

    # Convert the augmented image to a PIL image
    augmented_image_pil = transforms.ToPILImage()(augmented_image)

    # Save the augmented images to the specified directory
    augmented_image_pil.save(os.path.join(class_dir, f"augmented_{i}.jpg"), "JPEG")

# Now, your augmented images are saved in the "augmented_data" directory, preserving the original data.
'''

In [None]:
'''import shutil

# Directory to compress (your augmented dataset folder)
source_dir = "augmented_data"

# ZIP archive name
zip_file_name = "augmented_data.zip"

# Create a ZIP archive of the augmented dataset folder
shutil.make_archive(zip_file_name, 'zip', source_dir)

# Move the ZIP archive to a downloadable location
download_dir = "/kaggle/working"  # Replace with your desired download directory
shutil.move(zip_file_name + ".zip", download_dir)

# Provide a link for downloading the ZIP archive
print(f"Download the ZIP archive from this link: [Download {zip_file_name}](sandbox:/kaggle/working/{zip_file_name}.zip)")
'''

In [None]:
"""import os

data_dir = "/kaggle/input/5class-weather-status-image-classification/data"  # Replace with the path to your dataset

class_folders = os.listdir(data_dir)
total_count = 0

for folder in class_folders:
    folder_path = os.path.join(data_dir, folder)
    if os.path.isdir(folder_path):
        images = os.listdir(folder_path)
        count = len(images)
        print(f"Class {folder}: {count} images")
        total_count += count

print(f"Total dataset size: {total_count} images")
"""

In [None]:
import numpy as np
print(np.__version__)

In [None]:
fig,axes=plt.subplots(2,3,figsize=(7,7))
plt.subplots_adjust(wspace=0)
ax=axes.flatten()
cls_dist={}

data_dir="/kaggle/input/5class-weather/data"
for i,weather in enumerate(os.listdir(data_dir)):
    label=weather
    imgs=os.listdir(os.path.join(data_dir,weather))
    cls_dist[label]=len(imgs)

    img=cv2.imread(os.path.join(data_dir,weather,imgs[0]))
    img=cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img=cv2.resize(img,(160,160))
    ax[i].imshow(img)
    ax[i].set_title(f"{label.capitalize()}")
    ax[i].axis("off")

plt.tight_layout()
plt.axis("off")
plt.savefig("1.png");

In [None]:
order=[i for i,y in sorted(cls_dist.items(),key=lambda kv:(kv[1],kv[0]))]
sns.barplot(x=list(cls_dist.values()),y=list(cls_dist.keys()),color="blue",order=order);
plt.savefig("2.png");

# Dataset and DataLoader

In [None]:
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
train_transforms = transforms.Compose([
    transforms.RandomHorizontalFlip(0.5),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.RandomRotation(degrees=15),  # Random rotation up to 15 degrees
    transforms.GaussianBlur(kernel_size=5),
    normalize
    
      
    
])

val_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    normalize,
])

In [None]:
train_ds=ImageFolder(root=data_dir,transform=train_transforms)
val_ds=ImageFolder(root=data_dir,transform=val_transforms)

In [None]:
# After creating the train_ds and val_ds objects
num_images_after_transformation_train = len(train_ds)
num_images_after_transformation_val = len(val_ds)

print(f"Number of images in the transformed training dataset: {num_images_after_transformation_train}")
print(f"Number of images in the transformed validation dataset: {num_images_after_transformation_val}")

In [None]:
classes=train_ds.classes
classes

In [None]:
device=("cuda" if torch.cuda.is_available() else "cpu")
idx=list(range(len(train_ds)))

np.random.seed(42)
np.random.shuffle(idx)
train_idx=idx[:int(0.7*len(train_ds))]
val_idx=idx[int(0.7*len(train_ds)):]

train_ds=Subset(train_ds,train_idx)
val_ds=Subset(val_ds,val_idx)

batch_size = 64
train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2,
                      pin_memory=True if torch.cuda.is_available() else False)
val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True, num_workers=2,
                    pin_memory=True if torch.cuda.is_available() else False)


#device = "cuda" if torch.cuda.is_available() else "cpu"
#idx = list(range(len(train_ds)))

#np.random.seed(42)
#np.random.shuffle(idx)
#train_idx = idx[:int(0.8 * len(train_ds))]
#val_idx = idx[int(0.8 * len(train_ds)):]

#train_ds = Subset(train_ds, train_idx)
#val_ds = Subset(val_ds, val_idx)

#batch_size = 32  # Change batch size to 32
#train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True, num_workers=2,
                     # pin_memory=True if torch.cuda.is_available() else False)
#val_dl = DataLoader(val_ds, batch_size=batch_size, shuffle=True, num_workers=2,
                   # pin_memory=True if torch.cuda.is_available() else False)

In [None]:
print(device)

In [None]:
num_classes = 5
# Create a dictionary to store images for each class
class_images = {class_name: None for class_name in classes}

# Collect one image for each class
for i, (imgs, labels) in enumerate(train_dl):
    for img, label in zip(imgs, labels):
        class_name = classes[label.item()]
        if class_images[class_name] is None:
            class_images[class_name] = img
        if all(img is not None for img in class_images.values()):
            break
    if all(img is not None for img in class_images.values()):
        break

# Display the collected images
fig, axes = plt.subplots(1, num_classes, figsize=(7, 12))  # Adjust figsize as needed

for i, class_name in enumerate(class_images.keys()):
    ax = axes[i]
    img = class_images[class_name].permute(1, 2, 0).numpy()
    ax.imshow(img)
    ax.set_title(class_name)
    ax.axis("off")

plt.savefig("one_image_per_class.png")

# Model

In [None]:
class WeatherResNet152(nn.Module):
    def __init__(self, num_classes):
        super(WeatherResNet152, self).__init__()
        self.resnet = resnet152(pretrained=True)
        # Set requires_grad to True for layers you want to fine-tune
        for name, param in self.resnet.named_parameters():
            if 'layer4' in name:
                param.requires_grad = True  # Fine-tune layer4
            elif 'layer3' in name:
                param.requires_grad = True  # Fine-tune layer3
            else:
                param.requires_grad = False  # Freeze other layer
        in_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(in_features, num_classes)
        

    def forward(self, x):
        x = self.resnet(x)
        return x

# Define the model architecture
num_classes = 5  # Multi-label classification
model = WeatherResNet152(num_classes).to(device)

In [None]:
#for param in model.resnet.layer4.parameters():
   # param.requires_grad = True

In [None]:
#for param in model.resnet.fc.parameters():
    #param.requires_grad = False

In [None]:
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(),lr = 0.001)
lr_scheduler =  ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=5, verbose=True)

# Training

In [None]:
def epoch_cal(dataloader, mode):
    is_train = False
    epoch_loss = 0.0
    accuracy = 0.0
    length = len(dataloader.dataset)

    if mode == "train":
        model.train()
        is_train = True
    else:
        model.eval()

    with torch.set_grad_enabled(is_train):
        for imgs, labels in dataloader:
            imgs, labels = imgs.to(device), labels.to(device)
            output = model(imgs)
            loss = loss_fn(output, labels)

            epoch_loss += loss.item()
            pred_labels = output.argmax(dim=1)
            accuracy += (labels == pred_labels).sum().item()

            if is_train:
                loss.backward()
                optimizer.step()
                optimizer.zero_grad()

    return epoch_loss / length, accuracy / length

In [None]:
epochs = 20
best_val_acc = 0.0
min_delta = 0.0005
patience = 10
best_epoch = 0
save_interval = 5

loss_dict = {"train_loss": [], "train_acc": [], "val_loss": [], "val_acc": []}

for epoch in range(epochs):

    train_loss, train_acc = epoch_cal(train_dl, mode="train")
    loss_dict["train_loss"].append(train_loss)
    loss_dict["train_acc"].append(train_acc)

    val_loss, val_acc = epoch_cal(val_dl, mode="eval")
    loss_dict["val_loss"].append(val_loss)
    loss_dict["val_acc"].append(val_acc)

    if val_acc >= best_val_acc + min_delta:
        best_epoch = epoch
        best_val_acc = val_acc
        torch.save(model.state_dict(), "model.pth")

    if epoch - best_epoch > patience:
        print(f"Early Stopping training at: {epoch}")
        break

    # Update the learning rate scheduler based on validation accuracy
    lr_scheduler.step(val_acc)

    if epoch % save_interval == 0:
        torch.save(model.state_dict(), f'model_checkpoint_{epoch}.pth')
        torch.save(optimizer.state_dict(), f'optimizer_checkpoint_{epoch}.pth')

    print(f"{epoch}/{epochs}: Training\nloss: {train_loss:.4f} accuracy: {train_acc * 100:.2f}%")
    print(f"{epoch}/{epochs}: Evaluating\nloss: {val_loss:.4f} accuracy: {val_acc * 100:.2f}%")
    print("*" * 30, "\n")

In [None]:
#import os
#Replace 'path/to/directory' with the actual path of the directory you want to clear
#directory_path = '/kaggle/working/'

#Iterate over files in the directory and delete them
#for filename in os.listdir(directory_path):
    #file_path = os.path.join(directory_path, filename)
    #if os.path.isfile(file_path):
        #os.remove(file_path)

In [None]:
x=range(len(loss_dict['train_loss']))

sns.lineplot(x=x,y=loss_dict['train_loss'],label="Train Losses");
sns.lineplot(x=x,y=loss_dict['val_loss'],label="Validation Losses");
plt.title("Training Validation Datasets Losses Plot");
plt.legend();
plt.savefig("4.png")

In [None]:
sns.lineplot(x=x,y=loss_dict['train_acc'],label="Train Accuracy");
sns.lineplot(x=x,y=loss_dict['val_acc'],label="Validation Accuracy");
plt.title("Training Validation Datasets Accuracy Plot");
plt.legend();
plt.savefig("5.png")

# Deployment

In [None]:
num_classes = 5  # Number of classes
model = WeatherResNet152(num_classes)

# Load the trained model's state dictionary
device = torch.device("cpu")
state_dict = torch.load("/kaggle/working/model.pth", map_location=device)
model.load_state_dict(state_dict)

# Set the model in evaluation mode
model.eval()

In [None]:
# Calculate the number of rows and columns for subplots
num_images = 4  # Number of images you want to visualize
num_rows = (num_images + 1) // 2
num_cols = min(2, num_images)

# Get a batch of validation data
imgs, labels = next(iter(val_dl))
output = model(imgs)

# Calculate predicted labels for the batch
pred_labels = output.argmax(1)

# Create subplots dynamically based on the number of images
fig, axes = plt.subplots(num_rows, num_cols, figsize=(15, 15))
plt.subplots_adjust(wspace=0.2, hspace=0.4)  # Adjust the spacing between subplots

axes = axes.flatten()

# Loop through the images and plot them with true and predicted labels
for i in range(num_images):
    ax = axes[i]
    ax.imshow(imgs[i].permute(1, 2, 0).numpy())
    ax.text(10, 30, "True: " + classes[labels[i].item()], style='italic', fontsize=10, color="green")
    ax.text(120, 30, "Predict: " + classes[pred_labels[i].item()], style='italic', fontsize=10, color="red")
    ax.axis("off")

# Hide any remaining empty subplots
for i in range(num_images, num_rows * num_cols):
    fig.delaxes(axes[i])

plt.tight_layout()
plt.savefig("image_predictions.png")

# Evaluation

In [None]:
# Create a DataFrame to store predictions
#predictions = []

# Iterate through validation dataset
#for i, (imgs, labels) in enumerate(val_dl):
    #batch_size = imgs.shape[0]
    #outputs = model(imgs)
   # predicted_labels = outputs.argmax(1)

    #for j in range(batch_size):
       # filename = f"image_{i * batch_size + j}.jpg"  # Assuming a filename format
        #true_label = classes[labels[j].item()]
       # predicted_label = classes[predicted_labels[j].item()]

      #  predictions.append((filename, true_label, predicted_label))

# Convert predictions list to a DataFrame
#predictions_df = pd.DataFrame(predictions, columns=["Filename", "True Label", "Predicted Label"])

# Save predictions to a CSV file
#predictions_df.to_csv("predictions.csv", index=False)

In [None]:
num_classes = len(classes)
print("Number of classes:", num_classes)

In [None]:
num_classes = 5  # Number of classes
num_images = len(val_dl.dataset)
y_true = np.empty((num_images,))
y_probs = np.empty((num_images, num_classes))  # Initialize with the correct number of classes

for i, (imgs, labels) in enumerate(val_dl):
    output = model(imgs)
    output = F.softmax(output, dim=1)

    y_true[i * val_dl.batch_size: (i + 1) * val_dl.batch_size] = labels.numpy()
    y_probs[i * val_dl.batch_size: (i + 1) * val_dl.batch_size] = output.detach().numpy()

# Create a DataFrame to store predictions
columns = ['cloudy', 'foggy', 'rainy', 'snowy', 'sunny','y_true']  # Include 'y_true' in columns
data = np.concatenate([y_probs, y_true.reshape(-1, 1)], axis=1)
predictions = pd.DataFrame(data=data, columns=columns)
predictions.head()

In [None]:
predictions.to_csv("predictions.csv",index=False)

In [None]:
y_prob=predictions.iloc[:,:-1].values
y_pred=y_prob.argmax(1).astype(np.int16)
y_true=predictions.iloc[:,-1].values.astype(np.int16)

In [None]:
print(classification_report(y_true,y_pred))

In [None]:
plt.figure(figsize=(8,8));
val=['cloudy','foggy','rainy','snowy','sunny']
plot_data=pd.DataFrame(confusion_matrix(y_true,y_pred),columns=val,index=val)

sns.heatmap(plot_data,annot=True,cbar=False,cmap='Blues',fmt='g');
plt.xlabel('Predicted Label');
plt.ylabel('True Label');
plt.savefig("7.png");

In [None]:
plt.figure(figsize=(8, 8))

for i in range(y_probs.shape[1]):  # Use y_probs.shape[1] instead of y_prob.shape[1]
    y_binary = (y_true == i)
    precision, recall, _ = precision_recall_curve(y_binary, y_probs[:, i])
    plt.plot(recall, precision, label=val[i])

plt.legend()
plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.savefig("8.png")