# Importing important Packages and Libraries

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python

## Importing other useful packages and libraries 
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

import PIL
from PIL import Image

import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input,Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Know the data Directories

In [None]:
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Loading the dataset

In [None]:
## Loading the data set
train_df = pd.read_csv("/kaggle/input/ghana-crop-disease/Train.csv")
# test set
test_df = pd.read_csv("/kaggle/input/ghana-crop-disease/Test.csv")
# The whole dataset
sample_submission = pd.read_csv("/kaggle/input/ghana-crop-disease/SampleSubmission.csv")

print(train_df.head(3))  # Show first few rows
print(train_df.info())  # Check data structure
print(sample_submission.head(4))
print(sample_submission.info())

In [None]:
# Check unique disease classes
print("Number of Unique Diseases:", train_df['class'].nunique())
print(train_df['class'].value_counts().head(13))

# Data Preprocessing 
Match Image Paths with Disease Labels

In [None]:
# Define image directory
IMAGE_DIR = "/kaggle/input/ghana-crop-disease/images/"

# Add full image path column for the training data
train_df["image_path"] = IMAGE_DIR + train_df["Image_ID"]

# Add full image path column for the test data
test_df["image_path"] = IMAGE_DIR + test_df["Image_ID"]

# Encode class labels into numerical values for the training data
train_df['label'] = train_df['class'].astype('category').cat.codes

# Check first few rows
print("train_set\n ",train_df.head(2))


In [None]:

# Create a 2-row, 3-column plot
fig, axes = plt.subplots(2, 3, figsize=(12, 8))  # Adjusted figsize

# Loop through the first 6 images
for i in range(6):
    img_path = train_df["image_path"].iloc[i]  # Correct way to access paths
    img = Image.open(img_path)  # Open the image
    
    row, col = divmod(i, 3)  # Convert index to (row, col) for subplot indexing
    axes[row, col].imshow(img)  # Correct indexing for 2D array
    axes[row, col].axis("off")  # Hide axes
    axes[row, col].set_title(f"Image {i+1}")  # Add title

plt.tight_layout()  # Adjust layout
plt.show()



In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255, 
    rotation_range=20, 
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest',
    validation_split=0.2  ## 20% of training data for validation
)

# Using train_datagen for both training & validation splits
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df, 
    x_col='image_path',
    y_col='class',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    subset='training',  
    shuffle=True
)

val_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df, 
    x_col='image_path',
    y_col='class',
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical',
    subset='validation',  # 
    shuffle=False
)


# Building the CNN

In [None]:
## Initializing a three convolutional layers
model = Sequential([
    #1st convolutional layer (Input layer)
    Input(shape=(150, 150, 3)),
    Conv2D(32, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    #2nd convolutional layer
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    # 3rd convolutional layer
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),

    # Flatten
    Flatten(),
    # Full Connection
    Dense(128, activation='relu'),
    Dropout(0.5),
    
    ## Output layer with unique disease classes
    Dense(train_df['label'].nunique(), activation='softmax') 
])


## Compiling the CNN

In [None]:
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

model.summary()

## Train the CNN model

In [None]:
history = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=5,
    verbose=1,
)

# Model Evaluation Performance

## Model Accuracy

In [None]:
plt.figure(figsize=(12, 5))

# Accuracy Plot
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.title('Model Accuracy')

plt.show()


## Loss Function plot

In [None]:
# Loss Plot
plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.title('Model Loss')

plt.show()

In [None]:
test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_dataframe(
    test_df, 
    x_col='image_path',
    target_size=(150, 150),
    batch_size=32,
    class_mode=None,
    shuffle=False
)

# Predict
predictions = model.predict(test_generator)
predicted_classes = np.argmax(predictions, axis=1)

# Convert predictions to labels
label_map = dict((v, k) for k, v in train_generator.class_indices.items())
test_df['Predicted_Label'] = [label_map[i] for i in predicted_classes]

# Save submission
test_df[['image_id', 'Predicted_Label']].to_csv("submission.csv", index=False)
print("Submission file saved!")
