In [None]:
import splitfolders
import ipywidgets as widgets
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image

from weather_classification import (
    RAW_DATA_DIR, PROCESSED_DATA_DIR,
)

plt.rcParams["figure.figsize"] = [8, 6]

In [None]:
dataset_dname = "WeatherDataset"

dataset_dpath = RAW_DATA_DIR / dataset_dname
processed_dataset_dpath = PROCESSED_DATA_DIR / dataset_dname

In [None]:
lst_files = list(dataset_dpath.glob("*/*")) # Without dir names
print(f"Num files: {len(lst_files)}")

In [None]:
unique_exts = {file.suffix for file in lst_files}
print(f"Qnique exts: {unique_exts}")

### Splitting data

In [None]:
split_data = False

if split_data:
    splitfolders.ratio(
        input=dataset_dpath,
        output=processed_dataset_dpath,
        seed=42,
        ratio=(.8, .1, .1),
        group_prefix=None,
        move=False,
    )

In [None]:
train_dpath = processed_dataset_dpath / "train"
val_dpath = processed_dataset_dpath / "val"
test_dpath = processed_dataset_dpath / "test"

lst_train = list(train_dpath.glob("*/*"))
lst_val = list(val_dpath.glob("*/*"))
lst_test = list(test_dpath.glob("*/*"))

print(f"Train Len: {len(lst_train)} \n"
      f"Val Len: {len(lst_val)} \n"
      f"Test Len: {len(lst_test)}")

### Distribution by class

In [None]:
label_names = []
train_cls_len = []

lst_train = list(train_dpath.glob("*"))
for train_cls_dir in lst_train:
    num_classes = len(list(train_cls_dir.glob("*")))

    label_names.append(train_cls_dir.name)
    train_cls_len.append(num_classes)
    # print(f"\t'{train_cls_dir.name}': {num_classes} imgs")

plt.pie(
    train_cls_len,
    labels=label_names,
    autopct="%1.1f%%",
    colors=sns.color_palette("Set2"),
    # shadow=True,
    explode=[0.02 for i in range(len(label_names))],
)

plt.title(
    label="Train Distribution", 
    fontdict={"fontsize": 16},
    pad=10,
);

### Draw images

In [None]:
lst_test = list(test_dpath.glob("*"))

# From 0 to 10
ind_dir = 10
lst_images = list(lst_test[ind_dir].glob("*"))

@widgets.interact
def show_image(img_ind=widgets.IntSlider(value=0, min=0, max=len(lst_images) - 1)):
    img_fpath = lst_images[img_ind]
    img = Image.open(img_fpath)
    
    plt.imshow(img)
    plt.axis("off")
    plt.title(f"{img_fpath.parent.name} - {img.size}")