In [None]:
#Model is based on the 20k dataset for 11 classes of faults 
#It classifies 24x46 cells no anomaly or 10 different fault types

#Step 1: Loading dataset
!unzip "/data/2020-02-14_InfraredSolarModules (1).zip" -d "/data/"


In [None]:
#Step 2:Parsing the label json file
import os
import json
import shutil
import random
from tqdm import tqdm

# Define paths
json_path = "/data/InfraredSolarModules/module_metadata.json"
image_base_path = "/data/InfraredSolarModules/images"

# Load JSON
with open(json_path, "r") as f:
    meta = json.load(f)

# Shuffle entries
entries = list(meta.items())
random.shuffle(entries)

# Split into train and validation (80/20 split)
split_ratio = 0.8
split_index = int(len(entries) * split_ratio)
train_entries = entries[:split_index]
val_entries = entries[split_index:]

# Output dataset folders
base_dir = "/content/solar_classification_data"
train_dir = os.path.join(base_dir, "train")
val_dir = os.path.join(base_dir, "val")

def prepare_dataset(entries, dest_dir):
    for _, value in tqdm(entries):
        label = value["anomaly_class"].strip().replace(" ", "-")
        image_name = os.path.basename(value["image_filepath"])
        src_img_path = os.path.join(image_base_path, image_name)

        label_folder = os.path.join(dest_dir, label)
        os.makedirs(label_folder, exist_ok=True)

        dest_img_path = os.path.join(label_folder, image_name)

        if os.path.exists(src_img_path):
            shutil.copy(src_img_path, dest_img_path)
        else:
            print(f"❌ Image not found: {src_img_path}")

# Create the dataset
prepare_dataset(train_entries, train_dir)
prepare_dataset(val_entries, val_dir)

print("✅ Dataset structure ready at:", base_dir)


In [None]:
#Step 3: Model Training
!pip install ultralytics

from ultralytics import YOLO

model = YOLO("yolov8m-cls.pt")
results = model.train(
    data="/content/solar_classification_data",
    epochs=100,
    imgsz=224,
    batch=64,
    patience=40,
    flipud=0.1,            # vertical flip
    fliplr=0.5,            # horizontal flip
    hsv_h=0.015,           # hue
    hsv_s=0.7,             # saturation
    hsv_v=0.4,             # brightness
    mixup=0.2,             # mixup augmentation
    cutmix=0.2,            # cutmix augmentation
    auto_augment="randaugment",  # stronger auto augmentation policy
    erasing=0.4,           # random erasing
    dropout=0.2,           # drop units randomly
    mosaic=0.0,            # set mosaic to 0 for classification
    project="cell_fault_classifier",
    name="fault_11class_augmented"
)

