In [None]:
!pip install kaggle

In [None]:
import os
os.environ['KAGGLE_CONFIG_DIR'] = '/content/kaggle.json'

In [None]:
!kaggle datasets download -d crowww/a-large-scale-fish-dataset

In [None]:
from zipfile import ZipFile
with ZipFile("a-large-scale-fish-dataset.zip", 'r') as zip_ref:
    zip_ref.extractall()

In [None]:
DIR = '/content/Fish_Dataset/Fish_Dataset'
classes = [i for i in os.listdir(DIR) if '.' not in i]
classes

In [None]:
import numpy as np
import pandas as pd
label = []
path = []

for dirname, _,filenames in os.walk(DIR):
    for filename in filenames:
        if os.path.splitext(filename)[-1]=='.png':
            if dirname.split()[-1]!='GT':
                label.append(os.path.split(dirname)[-1])
                path.append(os.path.join(dirname,filename))

df = pd.DataFrame(columns=['path','label'])
df['path']=path
df['label']=label

In [None]:
df.head()

In [None]:
df.path[0]

In [None]:
df.info()

In [None]:
df['label'].value_counts()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
idx = 0
plt.figure(figsize=(15,12))
for unique_label in df['label'].unique():
    plt.subplot(3, 3, idx+1)
    plt.imshow(plt.imread(df[df['label']==unique_label].iloc[0,0]))
    plt.title(unique_label)
    plt.axis('off')
    idx+=1

In [None]:
from sklearn.model_selection import train_test_split
train_df, test_df = train_test_split(df, train_size=0.8, shuffle=True, random_state=42)

In [None]:
print(train_df.shape)
print(test_df.shape)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Flatten, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import TimeDistributed, Reshape
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

In [None]:
# Set up ImageDataGenerator with rescaling for normalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Augmentation settings for the training data
train_generator = ImageDataGenerator(
    rescale=1./255,                  # Rescale pixel values
    validation_split=0.2,            # Split for validation data
    # Data augmentation settings
    rotation_range=20,               # Randomly rotate images by 20 degrees
    width_shift_range=0.2,           # Randomly shift images horizontally by 20%
    height_shift_range=0.2,          # Randomly shift images vertically by 20%
    shear_range=0.2,                 # Randomly shear images
    zoom_range=0.2,                  # Randomly zoom images
    horizontal_flip=True,            # Randomly flip images horizontally
    fill_mode='nearest'              # Fill in missing pixels after transformations
)

# For validation and test data, we typically don't use augmentation
test_generator = ImageDataGenerator(rescale=1./255)

# Load training dataset with augmentation
train_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='path',
    y_col='label',
    target_size=(128, 128),          # Target size for images
    color_mode='rgb',                # Use RGB color mode
    class_mode='categorical',        # Multi-class classification
    batch_size=32,                   # Batch size
    shuffle=True,                    # Shuffle data
    seed=42,                         # Seed for reproducibility
    subset='training'                # Use the training subset
)

# Load validation dataset (without augmentation, just rescaling)
val_images = train_generator.flow_from_dataframe(
    dataframe=train_df,
    x_col='path',
    y_col='label',
    target_size=(128, 128),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=True,
    seed=42,
    subset='validation'             # Use the validation subset
)

# Load test dataset (no augmentation, just rescaling)
test_images = test_generator.flow_from_dataframe(
    dataframe=test_df,
    x_col='path',
    y_col='label',
    target_size=(128, 128),
    color_mode='rgb',
    class_mode='categorical',
    batch_size=32,
    shuffle=False                   # No shuffling for the test data
)


In [None]:
display(train_images.class_indices)
display(val_images.class_indices)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, LSTM, Reshape
from tensorflow.keras.optimizers import Adam

# CNN + RNN architecture
cnn_rnn_model = Sequential()

# CNN layers for feature extraction
cnn_rnn_model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
cnn_rnn_model.add(MaxPooling2D(pool_size=(2, 2)))

cnn_rnn_model.add(Conv2D(64, (3, 3), activation='relu'))
cnn_rnn_model.add(MaxPooling2D(pool_size=(2, 2)))

cnn_rnn_model.add(Conv2D(128, (3, 3), activation='relu'))
cnn_rnn_model.add(MaxPooling2D(pool_size=(2, 2)))

# Flatten the CNN output
cnn_rnn_model.add(Flatten())

# Reshape to (batch_size, timesteps, features) before passing to LSTM
cnn_rnn_model.add(Reshape((1, -1)))  # Reshape into 3D tensor (1 timestep, flattened features)

# Add LSTM layers
cnn_rnn_model.add(LSTM(64, return_sequences=True))

# Global Average Pooling to reduce sequence dimension
cnn_rnn_model.add(GlobalAveragePooling1D())

# Final dense layers for classification
cnn_rnn_model.add(Dense(512, activation='relu'))
cnn_rnn_model.add(Dense(9, activation='softmax'))  # 9 classes

# Compile the model
cnn_rnn_model.compile(optimizer=Adam(learning_rate=0.001),
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

# Train the model
history = cnn_rnn_model.fit(
    train_images,
    validation_data=val_images,
    epochs=6
)


In [None]:
train_loss, train_acc = cnn_rnn_model.evaluate(train_images)
print('Training accuracy:', train_acc)

In [None]:
val_loss, val_acc = cnn_rnn_model.evaluate(val_images)
print('Validation accuracy:', val_acc)

In [None]:
cnn_rnn_model.save('fish_prediction_model.keras')

In [None]:
history.history

In [None]:
import json
with open('training_hist.json','w') as f:
  json.dump(history.history,f)

In [None]:
print(history.history.keys())

In [None]:
epochs = range(1, len(history.history['accuracy']) + 1)

plt.plot(epochs, history.history['accuracy'], color='red', label='Training Accuracy')
plt.plot(epochs, history.history['val_accuracy'], color='blue', label='Validation Accuracy')
plt.xlabel('No. of Epochs')
plt.title('Visualization of Accuracy Result')
plt.legend()
plt.show()

In [None]:
rnn = tf.keras.models.load_model('fish_prediction_model.keras')

In [None]:
y_pred = rnn.predict(val_images)
predicted_categories = tf.argmax(y_pred, axis=1)

In [None]:
import cv2
image_path = '/content/Fish_Dataset/Fish_Dataset/Red Mullet/Red Mullet/00011.png'
img = cv2.imread(image_path)
img = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) #Converting BGR to RGB
plt.imshow(img)
plt.title('Test Image')
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
image = tf.keras.preprocessing.image.load_img(image_path,target_size=(128,128))
input_arr = tf.keras.preprocessing.image.img_to_array(image)
input_arr = np.array([input_arr])  # Convert single image to a batch.
predictions = rnn.predict(input_arr)

In [None]:
print(predictions)

In [None]:
result_index = np.argmax(predictions) #Return index of max element
print(result_index)

In [None]:
class_indices = val_images.class_indices

class_names = list(class_indices.keys())

print("Class Names:", class_names)

In [None]:
model_prediction = class_names[result_index]
plt.imshow(img)
plt.title(f"Fish Name: {model_prediction}")
plt.xticks([])
plt.yticks([])
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, classification_report
import json
import cv2


y_pred = rnn.predict(val_images)
predicted_categories = np.argmax(y_pred, axis=1)

y_true = val_images.classes

cm = confusion_matrix(y_true, predicted_categories)

plt.figure(figsize=(10, 8))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=list(val_images.class_indices.keys()), yticklabels=list(val_images.class_indices.keys()))
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

print(classification_report(y_true, predicted_categories, target_names=list(val_images.class_indices.keys())))

plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Loss over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Accuracy over Epochs')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

with open('training_hist.json', 'w') as f:
    json.dump(history.history, f)

for i in range(5):
    img_path = val_images.filepaths[i]
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    plt.imshow(img)
    plt.title(f"Predicted: {class_names[predicted_categories[i]]}, True: {class_names[y_true[i]]}")
    plt.axis('off')
    plt.show()


In [None]:
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_auc_score

y_true_bin = label_binarize(y_true, classes=np.arange(len(class_names)))
y_pred_bin = label_binarize(predicted_categories, classes=np.arange(len(class_names)))

plt.figure(figsize=(12, 8))
for i in range(len(class_names)):
    fpr, tpr, _ = roc_curve(y_true_bin[:, i], y_pred[:, i])
    roc_auc = auc(fpr, tpr)
    plt.plot(fpr, tpr, label=f'{class_names[i]} (AUC = {roc_auc:.2f})')

plt.plot([0, 1], [0, 1], color='navy', linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC) Curve')
plt.legend(loc="lower right")
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import numpy as np

# Predict the classes on the test set
predictions = rnn.predict(test_images)
predictions = np.argmax(predictions, axis=1)

# True labels
true_labels = test_images.classes

# Compute confusion matrix
cm = confusion_matrix(true_labels, predictions)

# Plot confusion matrix
plt.figure(figsize=(10, 7))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=test_images.class_indices.keys(), yticklabels=test_images.class_indices.keys())
plt.title('Confusion Matrix')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.countplot(x='label', data=df, order=class_names)
plt.title('Class Distribution in the Dataset')
plt.xticks(rotation=90)
plt.xlabel('Fish Species')
plt.ylabel('Number of Samples')
plt.show()

In [None]:
from sklearn.metrics import precision_recall_curve

plt.figure(figsize=(12, 8))
for i in range(len(class_names)):
    precision, recall, _ = precision_recall_curve(y_true_bin[:, i], y_pred[:, i])
    plt.plot(recall, precision, lw=2, label=f'{class_names[i]}')

plt.xlabel('Recall')
plt.ylabel('Precision')
plt.title('Precision-Recall Curve')
plt.legend(loc='lower left')
plt.show()