In [1]:
from ricehealthai.infrastructure.data_loader import build_image_dataframe
from ricehealthai.infrastructure.data_splitter import split_riceleaf_dataset

# ==============================================
# Load Rice Leaf Dataset
# ==============================================
print("\n" + "="*60)
print("Load Rise Leaf Dataset")
print("="*60 + "\n")

df = build_image_dataframe(show_summary=True)
print(f"DataFrame shape: {df.shape[0]} rows × {df.shape[1]} columns")

# ==============================================
# Split Rice Leaf Dataset into train/valid/test
# ==============================================
print("\n" + "=" * 60)
print("Split Rice Leaf Dataset into train/valid/test")
print("=" * 60 + "\n")

# Perform the split
train_df, valid_df, test_df = split_riceleaf_dataset(
    df,
    train_size=0.7,
    valid_size=0.20,
    test_size=0.10,
    random_state=42
)

# Display summary information
print(f"Training set: {train_df.shape[0]} rows × {train_df.shape[1]} columns")
display(train_df.head(3))

print(f"Validation set: {valid_df.shape[0]} rows × {valid_df.shape[1]} columns")
display(valid_df.head(3))

print(f"Test set: {test_df.shape[0]} rows × {test_df.shape[1]} columns")
display(test_df.head(3))


The history saving thread hit an unexpected error (OperationalError('unable to open database file')).History will not be written to the database.

Load Rise Leaf Dataset

2025-10-20 10:39:14 | INFO     | data_loader:build_image_dataframe - Building image DataFrame from directory: /Users/surelmanda/Data-Science-Projects/RiceHealthAI/data/raw
2025-10-20 10:39:14 | INFO     | data_loader:build_image_dataframe - Detected categories: ['Bacterialblight', 'Blast', 'Brownspot', 'Tungro']
2025-10-20 10:39:15 | INFO     | data_loader:build_image_dataframe - DataFrame created with 5932 rows and 2 columns.
2025-10-20 10:39:15 | INFO     | data_loader:build_image_dataframe - Image DataFrame building completed successfully.
2025-10-20 10:39:15 | INFO     | data_loader:summarize_image_counts - Starting image distribution summary...
2025-10-20 10:39:15 | INFO     | data_loader:summarize_image_counts - Detected 4 categories with a total of 5932 images.
2025-10-20 10:39:15 | INFO     | data_loader:summa

Unnamed: 0,image_path,label
2347,/Users/surelmanda/Data-Science-Projects/RiceHe...,Blast
3638,/Users/surelmanda/Data-Science-Projects/RiceHe...,Brownspot
86,/Users/surelmanda/Data-Science-Projects/RiceHe...,Bacterialblight


Validation set: 1186 rows × 2 columns


Unnamed: 0,image_path,label
5011,/Users/surelmanda/Data-Science-Projects/RiceHe...,Tungro
4501,/Users/surelmanda/Data-Science-Projects/RiceHe...,Brownspot
5690,/Users/surelmanda/Data-Science-Projects/RiceHe...,Tungro


Test set: 594 rows × 2 columns


Unnamed: 0,image_path,label
2411,/Users/surelmanda/Data-Science-Projects/RiceHe...,Blast
3718,/Users/surelmanda/Data-Science-Projects/RiceHe...,Brownspot
3532,/Users/surelmanda/Data-Science-Projects/RiceHe...,Brownspot


In [2]:
from torch.utils.data import DataLoader
from ricehealthai.core.utils import preview_dataloader
from ricehealthai.infrastructure.dataset import RiceLeafDataset
from ricehealthai.infrastructure.image_transformer import (get_train_transforms, get_valid_transforms, get_test_transforms)


# === Dataset d'entraînement (fit du LabelEncoder)
train_dataset = RiceLeafDataset(
    dataframe=train_df,
    transform=get_train_transforms(),
    fit_encoder=True  # on ajuste le label encoder ici
)

# On récupère le même LabelEncoder pour les autres splits
label_encoder = train_dataset.get_label_encoder()

# === Dataset de validation
valid_dataset = RiceLeafDataset(
    dataframe=valid_df,
    transform=get_valid_transforms(),
    label_encoder=label_encoder,
)

# === Dataset de test
test_dataset = RiceLeafDataset(
    dataframe=test_df,
    transform=get_test_transforms(),
    label_encoder=label_encoder,
)


# === DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


# ======================================================================
# Affichage du contenu du DataLoader d'entraînement / validation / test
# ======================================================================
preview_dataloader("Train", train_loader,2)
preview_dataloader("Validation", valid_loader,2)
preview_dataloader("Test", test_loader,2)

2025-10-20 10:39:18 | INFO     | dataset:__init__ - Fitted LabelEncoder with classes: ['Bacterialblight' 'Blast' 'Brownspot' 'Tungro']

Train DataLoader Preview



2025-10-20 10:39:18.989 python[968:8873] Error creating directory 
 There isn‚Äôt enough space. You can‚Äôt save the file ‚Äúmpsgraph-968-2025-10-20_10_39_18-2253918914‚Äù because there isn‚Äôt enough space.
2025-10-20 10:39:19.166 python[968:8873] Error creating directory 
 There isn‚Äôt enough space. You can‚Äôt save the file ‚Äúmpsgraph-968-2025-10-20_10_39_19-304278500‚Äù because there isn‚Äôt enough space.


[91mBatch: 0[0m 
[91mImage:[0m torch.Size([32, 3, 224, 224]) 
[91mTargets:[0m tensor([2, 2, 2, 0, 1, 2, 2, 0, 1, 0, 3, 1, 2, 0, 2, 0, 3, 0, 0, 0, 1, 1, 1, 2,
        3, 1, 2, 3, 0, 2, 1, 2], device='mps:0') 
[91mBatch: 1[0m 
[91mImage:[0m torch.Size([32, 3, 224, 224]) 
[91mTargets:[0m tensor([1, 0, 2, 1, 2, 3, 3, 1, 2, 3, 3, 0, 1, 0, 2, 1, 0, 2, 1, 3, 0, 2, 2, 1,
        1, 1, 3, 2, 2, 3, 0, 3], device='mps:0') 
[91mBatch: 2[0m 
[91mImage:[0m torch.Size([32, 3, 224, 224]) 
[91mTargets:[0m tensor([0, 1, 3, 3, 0, 0, 1, 0, 3, 0, 2, 2, 2, 3, 2, 0, 1, 1, 2, 2, 1, 3, 2, 1,
        2, 1, 3, 0, 1, 3, 1, 1], device='mps:0') 

Validation DataLoader Preview

[91mBatch: 0[0m 
[91mImage:[0m torch.Size([32, 3, 224, 224]) 
[91mTargets:[0m tensor([3, 2, 3, 0, 2, 1, 0, 0, 1, 1, 1, 1, 3, 3, 1, 3, 1, 3, 2, 3, 3, 0, 1, 2,
        0, 2, 3, 0, 0, 2, 1, 3], device='mps:0') 
[91mBatch: 1[0m 
[91mImage:[0m torch.Size([32, 3, 224, 224]) 
[91mTargets:[0m tensor([1, 3, 3, 0, 3, 1, 2,

In [3]:
from ricehealthai.infrastructure.models.custom_cnn import build_custom_cnn

model = build_custom_cnn(num_classes=4)
print(next(model.parameters()).device)


2025-10-20 10:39:21 | INFO     | utils:get_device - Using Apple MPS (Metal Performance Shaders)
mps:0
