<a href="https://colab.research.google.com/github/Iveynganga/MACHINE-LEARNING-2/blob/main/Week4ProjectDeepLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Data Preprocessing and Augmentation

In [None]:
import pandas as pd

# Load the CSV file
df = pd.read_csv('/content/train.csv')

# Print column names and the first few rows to verify the structure
print(df.columns)
print(df.head())


Index(['image_id', 'width', 'height', 'bbox', 'source'], dtype='object')
    image_id  width  height                         bbox   source
0  b6ab77fd7   1024    1024   [834.0, 222.0, 56.0, 36.0]  usask_1
1  b6ab77fd7   1024    1024  [226.0, 548.0, 130.0, 58.0]  usask_1
2  b6ab77fd7   1024    1024  [377.0, 504.0, 74.0, 160.0]  usask_1
3  b6ab77fd7   1024    1024  [834.0, 95.0, 109.0, 107.0]  usask_1
4  b6ab77fd7   1024    1024  [26.0, 144.0, 124.0, 117.0]  usask_1


# Verify the File Path and Type

In [None]:
import os

# Check if the file exists
if os.path.exists('/content/train.zip'):
    print("File exists")
else:
    print("File does not exist")

# Check the file type
print("File type:", os.path.splitext('/content/train.zip')[1])



File does not exist
File type: .zip


# Extraction of ZIP File

In [None]:
import zipfile

try:
    with zipfile.ZipFile('/content/train.csv.zip', 'r') as zip_ref:
        zip_ref.extractall('/content/train_images/')
    print("Extraction successful")
except zipfile.BadZipFile:
    print("Error: The file is not a valid zip file")


Extraction successful


In [None]:
import os
import shutil
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the CSV file
df = pd.read_csv('/content/train.csv')

# Print column names to verify
print(df.columns)

# Assuming 'image_id' is the filename and 'source' can be used as a label (replace 'source' with the correct label column)
df['filename'] = df['image_id']
df['label'] = df['source']  # Change 'source' to the correct column name representing class labels

# Create the main directories
base_dir = '/content/data/'
train_dir = os.path.join(base_dir, 'train')
validation_dir = os.path.join(base_dir, 'validation')

def create_directory(directory):
    if not os.path.exists(directory):
        try:
            os.makedirs(directory)
        except OSError as e:
            print(f"Error: {e.strerror} - {directory}")

create_directory(train_dir)
create_directory(validation_dir)

# Create subdirectories for each class
classes = df['label'].unique()
for cls in classes:
    create_directory(os.path.join(train_dir, cls))
    create_directory(os.path.join(validation_dir, cls))

# Split the data into training and validation sets
train_df, validation_df = train_test_split(df, test_size=0.2, stratify=df['label'])

# Function to copy images to the respective directories
def organize_images(dataframe, src_dir, dest_dir):
    for _, row in dataframe.iterrows():
        src_path = os.path.join(src_dir, row['filename'])
        dest_path = os.path.join(dest_dir, row['label'], row['filename'])
        if os.path.exists(src_path):
            shutil.copy(src_path, dest_path)
        else:
            print(f"File not found: {src_path}")

# Path where the images are currently stored
source_image_dir = '/content/train.zip'

# Organize training images
organize_images(train_df, source_image_dir, train_dir)

# Organize validation images
organize_images(validation_df, source_image_dir, validation_dir)


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
File not found: /content/train.zip/462d4d062
File not found: /content/train.zip/3712db803
File not found: /content/train.zip/026b6f389
File not found: /content/train.zip/8d318219c
File not found: /content/train.zip/4972b9493
File not found: /content/train.zip/78752f185
File not found: /content/train.zip/aff4e0a02
File not found: /content/train.zip/0cbe3f32c
File not found: /content/train.zip/427fddfcf
File not found: /content/train.zip/511dfeca3
File not found: /content/train.zip/5ccbf7419
File not found: /content/train.zip/e30cd9270
File not found: /content/train.zip/31b00c1ad
File not found: /content/train.zip/5b558a4c7
File not found: /content/train.zip/c0a6307fa
File not found: /content/train.zip/aa29cef4a
File not found: /content/train.zip/47dcd5b34
File not found: /content/train.zip/89d2fcb7c
File not found: /content/train.zip/1e58125ec
File not found: /content/train.zip/2661979ef
File not found: /content/train.zip/

# Prepare Image Data Generators

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

validation_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    target_size=(150, 150),
    batch_size=32,
    class_mode='categorical'
)

print(f'Training samples: {train_generator.samples}')
print(f'Validation samples: {validation_generator.samples}')

Found 0 images belonging to 7 classes.
Found 0 images belonging to 7 classes.
Training samples: 0
Validation samples: 0


# Define and Compile the Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(len(classes), activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


# Fit the Model

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

# Define callbacks
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=10, mode='min')

# Calculate steps_per_epoch and validation_steps
steps_per_epoch = max(train_generator.samples // train_generator.batch_size, 1)
validation_steps = max(validation_generator.samples // validation_generator.batch_size, 1)

# Fit the model
history = model.fit(
    train_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps,
    validation_data=validation_generator,
    epochs=50,
    callbacks=[checkpoint, early_stopping]
)


ValueError: Asked to retrieve element 0, but the Sequence has length 0

# Evaluate the Model

In [None]:
# Evaluate the model on validation data
val_loss, val_acc = model.evaluate(validation_generator, steps=validation_generator.samples // validation_generator.batch_size)
print(f'Validation accuracy: {val_acc}')

# Save the trained model
model.save('wheat_disease_classifier.h5')


ValueError: Unexpected value for `steps_per_epoch`. Received value is 0. Please check the docstring for `model.fit()` for supported values.