In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, BatchNormalization, Dense, Dropout, Flatten
from tensorflow.keras.utils import to_categorical

In [3]:
data_path = 'cassava_data'
csv_path = os.path.join(data_path, 'train.csv')
df = pd.read_csv(csv_path)


In [4]:
df['label'] = df['label'].astype(str)

In [5]:
# Split into train/val/test (70/15/15)
train_df, test_df = train_test_split(df, test_size=0.15, stratify=df['label'], random_state=42)
train_df, val_df = train_test_split(train_df, test_size=0.176, stratify=train_df['label'], random_state=42)

In [6]:
# Image generators with augmentation
image_dir = os.path.join(data_path, 'train_images')
datagen = ImageDataGenerator(rescale=1./255, rotation_range=15, zoom_range=0.2, horizontal_flip=True)

train_gen = datagen.flow_from_dataframe(train_df, image_dir, x_col='image_id', y_col='label',
                                        target_size=(128, 128), batch_size=32, class_mode='categorical')

val_gen = datagen.flow_from_dataframe(val_df, image_dir, x_col='image_id', y_col='label',
                                      target_size=(128, 128), batch_size=32, class_mode='categorical')

test_gen = ImageDataGenerator(rescale=1./255).flow_from_dataframe(
    test_df, image_dir, x_col='image_id', y_col='label',
    target_size=(128, 128), batch_size=32, class_mode='categorical', shuffle=False
)


Found 14986 validated image filenames belonging to 5 classes.
Found 3201 validated image filenames belonging to 5 classes.
Found 3210 validated image filenames belonging to 5 classes.


In [9]:
model = Sequential([
    Conv2D(32, (3,3), activation='relu', padding='same', input_shape=(128, 128, 3)),
    BatchNormalization(),
    Conv2D(32, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(64, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    Conv2D(64, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Conv2D(128, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    Conv2D(128, (3,3), activation='relu', padding='same'),
    BatchNormalization(),
    MaxPooling2D(2,2),

    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(5, activation='softmax')  # 5 disease classes
])

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

history = model.fit(train_gen, validation_data=val_gen, epochs=1)

[1m176/469[0m [32m━━━━━━━[0m[37m━━━━━━━━━━━━━[0m [1m10:32[0m 2s/step - accuracy: 0.4299 - loss: 7.4906

In [None]:
# Plot accuracy
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Validation')
plt.title('Training vs Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid()
plt.show()