<a href="https://colab.research.google.com/github/Ana-max-dot/s2/blob/main/Resnet_50_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import cv2
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import json
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.models import Model
from keras.layers import Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Activation,GlobalAveragePooling2D, Dense, BatchNormalization, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
df = pd.read_csv("/content/drive/MyDrive/Agritariat/Data/train.csv")
df

Unnamed: 0,image_id,label
0,1000015157.jpg,0
1,1000201771.jpg,3
2,100042118.jpg,1
3,1000723321.jpg,1
4,1000812911.jpg,3
...,...,...
21392,999068805.jpg,3
21393,999329392.jpg,3
21394,999474432.jpg,1
21395,999616605.jpg,4


In [4]:
with open("/content/drive/MyDrive/Agritariat/Data/label_num_to_disease_map.json", "r") as file:
    label_name = json.load(file)
label_name

{'0': 'Cassava Bacterial Blight (CBB)',
 '1': 'Cassava Brown Streak Disease (CBSD)',
 '2': 'Cassava Green Mottle (CGM)',
 '3': 'Cassava Mosaic Disease (CMD)',
 '4': 'Healthy'}

In [5]:
image_path = "/content/drive/MyDrive/Agritariat/Data/train_images"
image_list = os.listdir(image_path)

In [6]:
# Matching labels with images
df = df[df["image_id"].isin(image_list)]
df.reset_index(drop=True, inplace=True)

In [7]:
# create paths of the image and label
path_name = "/content/drive/MyDrive/Agritariat/Data/train_images/"
df["image_path"] = df["image_id"].apply(lambda x: str(path_name+x))
df["label_name"] = df["label"].apply(lambda x: label_name.get(str(x), "Unknown"))
df.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["image_path"] = df["image_id"].apply(lambda x: str(path_name+x))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label_name"] = df["label"].apply(lambda x: label_name.get(str(x), "Unknown"))


Unnamed: 0,image_id,label,image_path,label_name
0,157078263.jpg,3,/content/drive/MyDrive/Agritariat/Data/train_i...,Cassava Mosaic Disease (CMD)
1,1574893536.jpg,3,/content/drive/MyDrive/Agritariat/Data/train_i...,Cassava Mosaic Disease (CMD)
2,1575013487.jpg,3,/content/drive/MyDrive/Agritariat/Data/train_i...,Cassava Mosaic Disease (CMD)
3,1576606254.jpg,0,/content/drive/MyDrive/Agritariat/Data/train_i...,Cassava Bacterial Blight (CBB)
4,1579761476.jpg,1,/content/drive/MyDrive/Agritariat/Data/train_i...,Cassava Brown Streak Disease (CBSD)


In [8]:
from sklearn.model_selection import train_test_split

# train and test
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [9]:
def bottleneck_block(x, filters, downsample=False):
    if downsample:
        strides = 2
    else:
        strides = 1

    identity = x

    # 1x1 Conv to reduce dimensions
    x = layers.Conv2D(filters=filters, kernel_size=1, strides=strides, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # 3x3 Conv
    x = layers.Conv2D(filters=filters, kernel_size=3, padding='same')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # 1x1 Conv to restore dimensions
    x = layers.Conv2D(filters=4 * filters, kernel_size=1, padding='same')(x)
    x = layers.BatchNormalization()(x)

    if downsample or identity.shape[-1] != 4 * filters:
        identity = layers.Conv2D(filters=4 * filters, kernel_size=1, strides=strides, padding='same')(identity)
        identity = layers.BatchNormalization()(identity)

    x = layers.Add()([x, identity])
    x = layers.ReLU()(x)

    return x



In [10]:
def ResNet50(input_shape=(224, 224, 3), num_classes=5):
    inputs = tf.keras.Input(shape=input_shape)

    x = layers.Conv2D(64, kernel_size=7, strides=2, padding='same')(inputs)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(x)

    # Stage 1
    x = bottleneck_block(x, filters=64)
    x = bottleneck_block(x, filters=64)
    x = bottleneck_block(x, filters=64)

    # Stage 2
    x = bottleneck_block(x, filters=128, downsample=True)
    x = bottleneck_block(x, filters=128)
    x = bottleneck_block(x, filters=128)
    x = bottleneck_block(x, filters=128)

    # Stage 3
    x = bottleneck_block(x, filters=256, downsample=True)
    x = bottleneck_block(x, filters=256)
    x = bottleneck_block(x, filters=256)
    x = bottleneck_block(x, filters=256)
    x = bottleneck_block(x, filters=256)
    x = bottleneck_block(x, filters=256)

    # Stage 4
    x = bottleneck_block(x, filters=512, downsample=True)
    x = bottleneck_block(x, filters=512)
    x = bottleneck_block(x, filters=512)

    x = layers.GlobalAveragePooling2D()(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.Model(inputs, outputs)
    return model



In [11]:
import tensorflow as tf
from tensorflow.keras import layers  # Import layers

In [12]:
# Create model
resnet50_model = ResNet50()
resnet50_model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=["accuracy"]
)


In [17]:
# Create model
resnet50_model = ResNet50()
resnet50_model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=["accuracy"]
)


In [13]:
resnet50_model.summary()


In [14]:
epochs = 20
batch_size = 32
image_height = 224
image_width = 224

In [15]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define ImageDataGenerator for training and testing
train_datagen = ImageDataGenerator(
    rescale=1.0/255,  # Rescale pixel values to [0, 1]
    rotation_range=20,  # Randomly rotate images in the range 0-20 degrees
    width_shift_range=0.2,  # Randomly shift images horizontally by 20% of the width
    height_shift_range=0.2,  # Randomly shift images vertically by 20% of the height
    shear_range=0.2,  # Shear intensity (shear angle in counter-clockwise direction as radians)
    zoom_range=0.2,  # Randomly zoom in or out on images
    horizontal_flip=True,  # Randomly flip images horizontally
    fill_mode='nearest'  # Points outside the boundaries are filled according to the nearest pixel
)

test_datagen = ImageDataGenerator(
    rescale=1.0/255  # Only rescale pixel values to [0, 1]
)

# Use the data generators with flow_from_dataframe
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    x_col="image_path",
    y_col="label_name",
    target_size=(image_height, image_width),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode="categorical"
)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df,
    x_col="image_path",
    y_col="label_name",
    target_size=(image_height, image_width),
    batch_size=batch_size,
    color_mode="rgb",
    class_mode="categorical"
)

# Get sample numbers for fit and validation
nb_train_samples = train_generator.samples
nb_test_samples = test_generator.samples
classes = list(train_generator.class_indices.keys())


Found 14270 validated image filenames belonging to 5 classes.
Found 3568 validated image filenames belonging to 5 classes.


In [None]:
# Define the model once
resnet50_model = ResNet50()

# Compile the model once
resnet50_model.compile(
    optimizer=tf.keras.optimizers.SGD(learning_rate=0.0001),
    loss='categorical_crossentropy',
    metrics=["accuracy"]
)

# Train the model
resnet50_history = resnet50_model.fit(
    train_generator,
    epochs=epochs,
    validation_data=test_generator
)


Epoch 1/20


  self._warn_if_super_not_called()


[1m200/446[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m1:40:58[0m 25s/step - accuracy: 0.5696 - loss: 1.4164

In [17]:
import tensorflow as tf

# Convert generators to tf.data.Dataset
def create_dataset(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, image_height, image_width, 3], [None, num_classes])
    )
    dataset = dataset.cache()  # Cache data in memory after the first load
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)  # Prefetch data for optimal GPU usage
    return dataset

train_dataset = create_dataset(train_generator)
test_dataset = create_dataset(test_generator)


NameError: name 'num_classes' is not defined

In [18]:
num_classes = 5  # Define the number of output classes

def create_dataset(generator):
    dataset = tf.data.Dataset.from_generator(
        lambda: generator,
        output_types=(tf.float32, tf.float32),
        output_shapes=([None, image_height, image_width, 3], [None, num_classes])
    )
    dataset = dataset.cache()  # Cache data in memory after the first load
    dataset = dataset.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)  # Prefetch data for optimal GPU usage
    return dataset

train_dataset = create_dataset(train_generator)
test_dataset = create_dataset(test_generator)
