In [2]:
# Download dataset
!pip install -q gdown

# plant_leave_diseases_train.zip
!gdown https://drive.google.com/uc?id=1MCQ2ldiKZUeVM1rVw1gPlBaX43AJB3R0

# plant_leave_diseases_test.zip
!gdown https://drive.google.com/uc?id=1yqvfEVeb0IAutxZK83_wUoUWm5apYSF8

Downloading...
From (original): https://drive.google.com/uc?id=1MCQ2ldiKZUeVM1rVw1gPlBaX43AJB3R0
From (redirected): https://drive.google.com/uc?id=1MCQ2ldiKZUeVM1rVw1gPlBaX43AJB3R0&confirm=t&uuid=d8601cd9-d37a-44ff-9c3b-21be41f7a80d
To: /content/plant_leave_diseases_train.zip
100% 682M/682M [00:12<00:00, 55.1MB/s]
Downloading...
From (original): https://drive.google.com/uc?id=1yqvfEVeb0IAutxZK83_wUoUWm5apYSF8
From (redirected): https://drive.google.com/uc?id=1yqvfEVeb0IAutxZK83_wUoUWm5apYSF8&confirm=t&uuid=b1bd5df2-962c-4dc8-90c0-cd42e11bbbb5
To: /content/plant_leave_diseases_test.zip
100% 170M/170M [00:02<00:00, 76.0MB/s]


In [3]:
import zipfile

# Unzip data
with zipfile.ZipFile('plant_leave_diseases_train.zip', 'r') as zip_file:
    zip_file.extractall()

with zipfile.ZipFile('plant_leave_diseases_test.zip', 'r') as zip_file:
    zip_file.extractall()

In [5]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split

# Paths to your folders
train_data_dir = 'plant_leave_diseases_train'

# Image settings
img_size = (256, 256)
batch_size = 32

# Create a ImageDataGenerator with validation split
datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

# Training data generator
train_generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training'
)

# Validation data generator
val_generator = datagen.flow_from_directory(
    train_data_dir,
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

Found 34756 images belonging to 38 classes.
Found 8673 images belonging to 38 classes.


In [6]:
# Build CNN model
model = Sequential([
    Input(shape=(img_size[0], img_size[1], 3)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(train_generator.num_classes, activation='softmax')
])

# Compile and train
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_generator, validation_data=val_generator, epochs=5)

Epoch 1/5


  self._warn_if_super_not_called()


[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 84ms/step - accuracy: 0.4851 - loss: 2.0107 - val_accuracy: 0.7699 - val_loss: 0.7527
Epoch 2/5
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m86s[0m 79ms/step - accuracy: 0.8555 - loss: 0.4562 - val_accuracy: 0.8261 - val_loss: 0.5770
Epoch 3/5
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 84ms/step - accuracy: 0.9237 - loss: 0.2336 - val_accuracy: 0.8644 - val_loss: 0.4577
Epoch 4/5
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m84s[0m 77ms/step - accuracy: 0.9561 - loss: 0.1323 - val_accuracy: 0.8849 - val_loss: 0.4112
Epoch 5/5
[1m1087/1087[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m85s[0m 78ms/step - accuracy: 0.9673 - loss: 0.0996 - val_accuracy: 0.8948 - val_loss: 0.4047


<keras.src.callbacks.history.History at 0x798132d9d210>

In [16]:
import pandas as pd
import numpy as np
import os
import shutil # Added for file operations

# --- Start of new code to reorganize test data --- #
original_test_data_dir = 'plant_leave_diseases_test'
reorganized_test_data_dir = os.path.join(original_test_data_dir, 'images')

# Create the new subdirectory if it doesn't exist
if not os.path.exists(reorganized_test_data_dir):
    os.makedirs(reorganized_test_data_dir)

# Move all image files from the original test directory to the new subdirectory
for filename in os.listdir(original_test_data_dir):
    if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
        src_path = os.path.join(original_test_data_dir, filename)
        dst_path = os.path.join(reorganized_test_data_dir, filename)
        if os.path.isfile(src_path): # Ensure it's a file, not a directory
            shutil.move(src_path, dst_path)
# --- End of new code to reorganize test data --- #

# Prepare test data generator (now pointing to the reorganized directory)
test_datagen = ImageDataGenerator(rescale=1./255)

# It's crucial to set shuffle=False for the test generator to maintain order
test_generator = test_datagen.flow_from_directory(
    original_test_data_dir, # Use the reorganized directory
    target_size=img_size,
    batch_size=batch_size,
    class_mode=None, # No labels for test set
    shuffle=False # Important for matching predictions with filenames
)

# Get filenames and map them to IDs
filenames = test_generator.filenames
# Extract base filenames without path and extension, then convert to integer IDs
image_ids = [int(os.path.splitext(os.path.basename(f))[0]) for f in filenames]

# Make predictions
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Get class labels from the training generator
class_labels = list(train_generator.class_indices.keys())
predicted_class_names = [class_labels[idx] for idx in predicted_classes]

# Create submission DataFrame
submission_df = pd.DataFrame({
    'id': image_ids,
    'class': predicted_class_names
})

# Sort by ID to ensure consistent output, although the task states order doesn't matter
submission_df = submission_df.sort_values(by='id').reset_index(drop=True)

# Format 'id' with leading zeros if necessary (e.g., 00001)
submission_df['id'] = submission_df['id'].apply(lambda x: f'{x:05d}')

# Define submission filename (replace with actual student ID and optional team name)
student_id = '12502379' # Replace with your actual student ID
team_name = 'Mariam' # Optional: Replace with your team name, e.g., 'MyTeam'

if team_name:
    submission_filename = f'submission_{student_id}_{team_name}.csv'
else:
    submission_filename = f'submission_{student_id}.csv'

# Save to CSV
submission_df.to_csv(submission_filename, index=False)

print(f'Submission file saved as: {submission_filename}')

Found 10876 images belonging to 1 classes.


  self._warn_if_super_not_called()


[1m340/340[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 58ms/step
Submission file saved as: submission_12502379_Mariam.csv
