In [14]:
import os
import json
import random
from PIL import Image

import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models

from tqdm import tqdm
import matplotlib.pyplot as plt 

%matplotlib inline

## set paths and config

In [15]:
# set paths 

cwd = os.getcwd()

project_root = os.path.abspath(os.path.join(cwd, ".."))

data_root = os.path.join(project_root, "data", "raw", "AerialWaste")

image_dirs = [os.path.join(data_root, f"images{i}") for i in range(6)]

train_json = f'{data_root}/training.json'

test_json = f'{data_root}/testing.json'


In [16]:
# set constants 

image_size = 244 
batch_size = 32
num_workers = 1
seed = 42

torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

In [17]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

## get usable image paths from json 

In [18]:
def get_image_path(file_name, image_dirs):
    for dir_path in image_dirs:
        full_path = os.path.join(dir_path, file_name)
        if os.path.exists(full_path):
            return full_path
    return None


In [19]:
with open(train_json, "r") as f:
    train_json_data = json.load(f)

records = []

for img in train_json_data["images"]:
    path = get_image_path(img["file_name"], image_dirs)

    if path is not None:
        records.append({
            "file_name": img["file_name"],
            "full_path": path,
            "waste": int(img["is_candidate_location"])
        })

df = pd.DataFrame(records)
df.head()


Unnamed: 0,file_name,full_path,waste
0,2.png,C:\Users\rimsh\Desktop\rimsha\github\urban-was...,1
1,3.png,C:\Users\rimsh\Desktop\rimsha\github\urban-was...,1
2,4.png,C:\Users\rimsh\Desktop\rimsha\github\urban-was...,1
3,5.png,C:\Users\rimsh\Desktop\rimsha\github\urban-was...,1
4,6.png,C:\Users\rimsh\Desktop\rimsha\github\urban-was...,1


In [20]:
print("Total usable training images:", len(df))
print(df["waste"].value_counts())


Total usable training images: 6327
waste
0    4205
1    2122
Name: count, dtype: int64


## train-val split

In [21]:
train_df, val_df = train_test_split(
    df,
    test_size=0.2,
    stratify=df["waste"],
    random_state=seed,
    shuffle=True
)

train_df = train_df.reset_index(drop=True)
val_df = train_df.reset_index(drop=True)

print("Train size:", len(train_df))
print("Validation size:", len(val_df))


Train size: 5061
Validation size: 5061


## image transformation + resizing

In [22]:
train_transforms = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.RandomRotation(15),      # rotate Â±15 degrees
    transforms.ColorJitter(0.1, 0.1, 0.1, 0.1), # slight brightness/contrast/saturation changes
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_transforms = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])


## prepare training images and labels

In [None]:
train_images = []
train_labels = []

for idx, row in train_df.iterrows():
    img = Image.open(row['full_path']).convert('RGB')
    img_tensor = train_transforms(img)  # apply your resizing + normalization
    train_images.append(img_tensor)
    train_labels.append(row['waste'])



In [None]:
train_images

In [None]:
# Convert labels to tensor
train_labels = torch.tensor(train_labels, dtype=torch.long)