In [1]:
Dataset_url = "https://www.kaggle.com/datasets/jehanbhathena/weather-dataset"

In [2]:
# Upload Kaggle json file
!mkdir ~/.kaggle/
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d jehanbhathena/weather-dataset

Dataset URL: https://www.kaggle.com/datasets/jehanbhathena/weather-dataset
License(s): CC0-1.0
Downloading weather-dataset.zip to /content
 99% 580M/587M [00:02<00:00, 225MB/s]
100% 587M/587M [00:03<00:00, 204MB/s]


In [4]:
!unzip -q weather-dataset.zip

In [5]:
# Import
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset, random_split
from torchvision import datasets, transforms, models
from PIL import Image
from tqdm.auto import tqdm
import time

In [6]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
device

'cuda'

In [7]:
# Define transformation for data augmentation or preprocessing
transform = transforms.Compose([
    transforms.Resize((255, 255)),
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(degrees=15),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])




In [8]:
class CustomImageDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.samples = []

        # Gather file paths of all images in the folders and their corresponding labels
        for label, folder in enumerate(os.listdir(root_dir)):
            folder_path = os.path.join(root_dir, folder)
            if os.path.isdir(folder_path):
                for file in os.listdir(folder_path):
                  if file.lower().endswith(('.jpg', '.jpeg', '.png', '.bmp', '.webp')):
                    self.samples.append((os.path.join(folder_path, file), label))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, index):
        try:
            image_path, label = self.samples[index]
            image = Image.open(image_path).convert('RGB')

            if self.transform:
                image = self.transform(image)

            return image, label

        except Exception as e:
            # If there's an error opening or processing the image, print the error and return None
            #print(f"Error loading image: {image_path} - {e}")
            return self.__getitem__(index + 1)

In [9]:
# Instantiate custom dataset
root_dir = '/content/dataset'
custom_dataset = CustomImageDataset(root_dir, transform=transform)

In [10]:
len(custom_dataset)

6862

In [11]:
# Define the size of the training set
train_size = int(0.8 * len(custom_dataset))
test_size = len(custom_dataset) - train_size

# Split the dataset into training and testing sets
train_dataset, test_dataset = torch.utils.data.random_split(custom_dataset, [train_size, test_size])

len(train_dataset), len(test_dataset)

(5489, 1373)

In [24]:
# Define batch size for DataLoader
batch_size = 4

# Create DataLoader for training set
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last = True)

# Create DataLoader for testing set
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [25]:
def train(dataloader, model, loss_fn, optimizer,device):
    size = len(dataloader.dataset)
    model.to(device)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [26]:
def test(dataloader, model, loss_fn, device):
    size = len(dataloader.dataset)
    model.to(device)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [27]:
# Create a convolutional neural network
class DeeperCnnModel(nn.Module):

    # Deeper model architecture inspired by TinyVGG.

    def __init__(self, input_shape: int, hidden_units: int, output_shape: int):
        super().__init__()
        self.block_1 = nn.Sequential(
            nn.Conv2d(in_channels=input_shape,
                      out_channels=hidden_units,
                      kernel_size=3,  # how big is the square that's going over the image?
                      stride=1,  # default
                      padding=1),  # options = "valid" (no padding) or "same" (output has same shape as input) or int for specific number
            nn.ReLU(),
            nn.Conv2d(in_channels=hidden_units,
                      out_channels=hidden_units,
                      kernel_size=3,
                      stride=1,
                      padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2,
                         stride=2),
            nn.Dropout(0.2)
        )
        self.block_2 = nn.Sequential(
            nn.Conv2d(hidden_units, hidden_units * 2, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units * 2, hidden_units * 2, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.2)
        )
        self.block_3 = nn.Sequential(
            nn.Conv2d(hidden_units * 2, hidden_units * 4, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units * 4, hidden_units * 4, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.2)
        )
        self.block_4 = nn.Sequential(
            nn.Conv2d(hidden_units * 4, hidden_units * 8, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(hidden_units * 8, hidden_units * 8, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2),
            nn.Dropout(0.2)
        )
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(in_features=78400,  # Adjust according to the input image size and pooling layers
                      out_features=hidden_units * 16),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_units * 16),
            nn.Linear(in_features=hidden_units * 16,
                      out_features=hidden_units * 8),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_units * 8),
            nn.Linear(in_features=hidden_units * 8,
                      out_features=hidden_units * 4),
            nn.ReLU(),
            nn.BatchNorm1d(hidden_units * 4),
            nn.Linear(in_features=hidden_units * 4,
                      out_features=output_shape),
            nn.Softmax(dim=1)  # Add softmax layer
        )

    def forward(self, x: torch.Tensor):
        x = self.block_1(x)
        x = self.block_2(x)
        x = self.block_3(x)
        x = self.block_4(x)
        x = self.classifier(x)
        return x

torch.manual_seed(23)
model = DeeperCnnModel(input_shape=3,
                       hidden_units=50,
                       output_shape=11)

# Summary of the model
print(model)

DeeperCnnModel(
  (block_1): Sequential(
    (0): Conv2d(3, 50, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(50, 50, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Dropout(p=0.2, inplace=False)
  )
  (block_2): Sequential(
    (0): Conv2d(50, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(100, 100, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Dropout(p=0.2, inplace=False)
  )
  (block_3): Sequential(
    (0): Conv2d(100, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): Conv2d(200, 200, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Dropout(p=

In [32]:
# Setup loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(),
                             lr=1e-4)

In [33]:
from tqdm.auto import tqdm
import time
start_time = time.time()


# Train and test model
epochs = 5
for epoch in tqdm(range(epochs)):
    print(f"Epoch: {epoch}\n---------")
    train(train_loader, model, loss_fn, optimizer,device)
    test(test_loader, model, loss_fn,device)

current_time = time.time()
total = current_time - start_time
print("Done!")
print(f'Training Took: {total/60} minutes!')

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 0
---------
loss: 1.818604  [    4/ 5489]
loss: 2.228086  [  404/ 5489]
loss: 2.165209  [  804/ 5489]
loss: 2.222356  [ 1204/ 5489]
loss: 2.532373  [ 1604/ 5489]
loss: 2.532059  [ 2004/ 5489]
loss: 2.452021  [ 2404/ 5489]
loss: 2.486551  [ 2804/ 5489]
loss: 2.272977  [ 3204/ 5489]
loss: 2.271206  [ 3604/ 5489]
loss: 2.276207  [ 4004/ 5489]
loss: 1.943467  [ 4404/ 5489]
loss: 1.803722  [ 4804/ 5489]
loss: 2.280109  [ 5204/ 5489]
Test Error: 
 Accuracy: 34.7%, Avg loss: 2.196459 

Epoch: 1
---------
loss: 2.260381  [    4/ 5489]
loss: 2.053766  [  404/ 5489]
loss: 2.363287  [  804/ 5489]
loss: 2.278893  [ 1204/ 5489]
loss: 2.322383  [ 1604/ 5489]
loss: 2.516241  [ 2004/ 5489]
loss: 2.503463  [ 2404/ 5489]
loss: 2.013679  [ 2804/ 5489]
loss: 2.094459  [ 3204/ 5489]
loss: 2.288945  [ 3604/ 5489]
loss: 2.029299  [ 4004/ 5489]
loss: 2.247892  [ 4404/ 5489]
loss: 2.395879  [ 4804/ 5489]
loss: 2.177980  [ 5204/ 5489]
Test Error: 
 Accuracy: 36.4%, Avg loss: 2.186208 

Epoch: 2
---------