In [61]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'weather-dataset:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F1715246%2F2854929%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240715%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240715T115735Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D9dad423653c9ac8a15f188a6bbf65e016d7df61cc95fd9ce428ad2da069280f5592745ea1c6e92ecd165c0265adc394bae436d114738cdedfd90fcce7f9fb3b35499044a11d59396a6f999576df4dcefaa7f6eab4562d66b995bea9e3302efd88a2642d9c92ac33173aeb45f229d8124feafa18a6e3ad93e8fe3e53feec504004b8ce17f50a2d93cf0eefa535f4f7693d0c0ce920b24de5de4d60616faf0b73a9258d181bef7731d21d8536c298c97a7b2871519bbed90970e3e47879c588b480323896141202f3c0188058d9518c0c480a6cd4b0ffc2fcdc41e0bb4c973bef23dc37bc2d2dffccf73981655fd7e0548b1adc14e4accc894bbe48f8b5baa71be'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading weather-dataset, 615102810 bytes compressed
Downloaded and uncompressed: weather-dataset
Data source import complete.


In [67]:
import os
import pandas as pd

In [68]:
data = list()

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        label = dirname.split("/")[-1]
        image = os.path.join(dirname, filename)
        data.append((label, image))

In [69]:
df = pd.DataFrame(data, columns=["label", "image"])
df.head()

Unnamed: 0,label,image
0,rain,/kaggle/input/weather-dataset/dataset/rain/152...
1,rain,/kaggle/input/weather-dataset/dataset/rain/107...
2,rain,/kaggle/input/weather-dataset/dataset/rain/176...
3,rain,/kaggle/input/weather-dataset/dataset/rain/149...
4,rain,/kaggle/input/weather-dataset/dataset/rain/152...


In [70]:
df.shape

(6862, 2)

In [71]:
df['label'].unique()


array(['rain', 'rainbow', 'sandstorm', 'glaze', 'dew', 'fogsmog', 'snow',
       'frost', 'rime', 'lightning', 'hail'], dtype=object)

In [72]:
len(df['label'].unique())

11

In [73]:
df['image'][0]

'/kaggle/input/weather-dataset/dataset/rain/1523.jpg'

In [74]:
# Split dataset into train and test sets
from sklearn.model_selection import train_test_split

train_data, test_data = train_test_split(df, test_size=0.2, random_state=42)

# Split train_data into train and validation sets
train_data, val_data = train_test_split(train_data, test_size=0.25, random_state=42)  # 0.25 x 0.8 = 0.2

In [75]:
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset
import torchvision.transforms as transforms


In [76]:
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset
from sklearn.preprocessing import LabelEncoder


class CustomDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        # Convert the Subset to a DataFrame
        self.data = dataframe.reset_index(drop=True)
        self.transform = transform
        self.label_encoder = LabelEncoder()
        self.data['label'] = self.label_encoder.fit_transform(self.data['label'])

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = self.data.iloc[idx]['image']
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.data.iloc[idx]['label'], dtype=torch.long)

        if self.transform:
            image = self.transform(image)

        return image, label


In [77]:
# Example transformations for image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


In [78]:
# Create an instance of the WeatherDataset
train = CustomDataset(train_data, transform=transform)
vaild = CustomDataset(val_data, transform=transform)
test = CustomDataset(test_data, transform=transform)

In [79]:
# Example usage of DataLoader
batch_size = 32
shuffle = True
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=shuffle,num_workers=4)
vaild_loader = torch.utils.data.DataLoader(vaild, batch_size=batch_size, shuffle=False,num_workers=4)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False,num_workers=4)



In [80]:
# Iterate through the DataLoader
for batch_idx, (images, labels) in enumerate(train_loader):
    print(f'Batch {batch_idx}:')
    print(f'  Images shape: {images.shape}')  # Shape of the batch of images
    print(f'  Labels: {labels}')              # Labels corresponding to the images in the batch
    break  # For demonstration, break after first batch


Batch 0:
  Images shape: torch.Size([32, 3, 224, 224])
  Labels: tensor([10,  3, 10,  6,  8,  0,  3,  1,  9,  1,  8,  9,  2,  8,  0,  2,  1,  2,
        10, 10,  4,  9,  4,  3,  5,  0,  1,  0,  6,  8,  4,  9])


In [84]:
# Load pre-trained VGG16 model
import torchvision.models as models
import torch.nn as nn

vgg16 = models.vgg16(pretrained=True)

# Freeze parameters so we don't backprop through them
for param in vgg16.parameters():
    param.requires_grad = False

# Modify the last fully connected layer for your number of classes
num_classes = 11
vgg16.classifier[6] = nn.Linear(4096, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(vgg16.parameters(), lr=0.001)

In [87]:
def train_model(model, criterion, optimizer, num_epochs=3):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        # Print average loss per epoch
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {epoch_loss:.4f}')

        # Evaluate on validation set
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in vaild_loader:
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(vaild_loader.dataset)
        val_accuracy = correct / total
        print(f'Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2%}')

In [88]:
train_model(vgg16, criterion, optimizer)

Epoch [1/3], Loss: 0.7503
Validation Loss: 0.4804, Validation Accuracy: 82.81%
Epoch [2/3], Loss: 0.4444
Validation Loss: 0.4234, Validation Accuracy: 84.56%
Epoch [3/3], Loss: 0.3946
Validation Loss: 0.4138, Validation Accuracy: 85.87%


In [89]:
def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in data_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Accuracy on the test set: {accuracy:.2%}')

In [90]:
evaluate_model(vgg16, test_loader)

Accuracy on the test set: 87.04%


In [91]:
torch.save(vgg16.state_dict(), 'vgg16_model.pth')