<a href="https://colab.research.google.com/github/Rijann12/Python/blob/main/ProjectModelTrain.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Importing Libraries

In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Data Preprocessing

## Training Image Preprocessing

In [None]:
training_set = tf.keras.utils.image_dataset_from_directory(
    '/content/drive/MyDrive/DataTrain/train',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=64,
    image_size=(128, 128),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    data_format=None,
    verbose=True,
)



Found 15143 files belonging to 8 classes.


In [None]:
training_set
training_set = training_set.prefetch(buffer_size=tf.data.AUTOTUNE) # prefetch() helps in pieplining.So, one process runs in parallel(overlapping) with another process (like CPU loading and preparing next batch).
# while GPU is training on batch n,CPU is already preparing batch n+1 in background.Basically for faster training.

In [None]:
for x,y in training_set:
  print(x,x.shape)
  print(y,y.shape)
  break


## Validation Image Preprocessing


In [None]:
validation_set = tf.keras.utils.image_dataset_from_directory(
    '/content/drive/MyDrive/DataTrain/valid',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=64,
    image_size=(128, 128),
    shuffle=True,
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    verbose=True,
)

In [None]:
validation_set
validation_set = validation_set.prefetch(buffer_size=tf.data.AUTOTUNE)


## To avoid Overshooting
1. Choose small learning rate default 0.001 we are taking 0.0001
2. There may be chance of Underfitting, so incresase number of neuron
3. Add more Convolutional layer to extract more feature from images there may  be possibility that the model uanle to captutre relevant feature or model is confusing due to lack of feature so feed with more feature

# Building Model

In [None]:
from tensorflow.keras.layers import Dense,Conv2D,MaxPool2D,Flatten,Dropout
from tensorflow.keras.models import Sequential

In [None]:
model = Sequential()

In [None]:
#Building Convolutional Layer
model.add(Conv2D(filters=32, kernel_size=3, padding='same', activation='relu',input_shape=(128,128,3)))
model.add(Conv2D(filters=32, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))


In [None]:
model.add(Conv2D(filters=64, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))


In [None]:
model.add(Conv2D(filters=128, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=128, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))


In [None]:
model.add(Conv2D(filters=256, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=256, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
model.add(Conv2D(filters=512, kernel_size=3, padding='same', activation='relu'))
model.add(Conv2D(filters=512, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2,strides=2))

In [None]:
model.add(Dropout(0.25))

In [None]:
model.add(Flatten())

In [None]:
model.add(Dense(units=1500, activation='relu'))

In [None]:
model.add(Dropout(0.4))

In [None]:
# Output layer
model.add(Dense(units=22, activation='softmax'))#units are the no of class present in the dataset

#Compiling the model

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(
    learning_rate=0.0001), loss='categorical_crossentropy',metrics=['accuracy'] )


In [None]:
model.summary()

#Model Training

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
training_history = model.fit(
    training_set,
    validation_data=validation_set,
    epochs=14,
    callbacks=[early_stop]
)

In [None]:
#training_history = model.fit(x=training_set, validation_data=validation_set, epochs=9
                             )

## Model Evaluation

In [None]:
# Model Evaluation on Training Set
train_loss, train_acc = model.evaluate(training_set)

In [None]:
print("Train loss:",train_loss, "Train Accuracy:",train_acc)

In [None]:
# Model validation Set
val_loss, val_acc = model.evaluate(validation_set)

In [None]:
print("Validation loss:", val_loss, "Validation accuracy:", val_acc)

# Saving model

In [None]:
model.save("trained_model.h5")

In [None]:
# To save this file we have used 'keras' format as it compresses the file size whereas '.h5' format takes up more file size
model.save("trained_model.keras")


In [None]:
training_history.history
# validation accuracy tara validation garesi garne
#training_history.history(val_accuracy)

In [None]:
# Recording the histroy in json
# We have saved this training history in 'json' for future use of the data or perform data visualization as we can just read this history
# Also for the future use of the this model as we cannot run the model again and again if we need it for future
import json
with open("training_hist.json","w") as f:
  json.dump(training_history.history,f);

print("Model & training history saved.")

In [None]:
# model.evaluate() computes loss and accuracy metrics on given dataset
# It checks for overfittting, if training accuracy higher than validation accuracy, model might be overfitting.
train_loss, train_acc = model.evaluate(training_set)
print(f"Train Loss: {train_loss:.4f}, Accuracy: {train_acc:.4f}")

val_loss, val_acc = model.evaluate(validation_set)
print(f"Valid Loss: {val_loss:.4f}, Accuracy: {val_acc:.4f}")


# Accuracy Visualization

In [None]:
epochs = range(1, len(training_history.history['accuracy']) + 1)
plt.figure(figsize=(8, 5))
plt.plot(epochs, training_history.history['accuracy'], 'r', label='Train Acc')
plt.plot(epochs, training_history.history['val_accuracy'], 'b', label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training & Validation Accuracy')
plt.legend()
plt.show()


In [None]:
# epochs = { i for in range(1,1)}
# plt.plot(epochs, training_history.history ['accuracy'], color= 'red', label='Training Accuracy')
# plt.plot(epochs, training_history.history ['val_accuracy'], color= 'blue', label='Vlaidation Accuracy')
# plt.xlabel("No. of epochs")
# plt.ylabel("Accuracy Result")
# plt.title("Visualization Accuracy Result")
# plt.legend()
# plt.show()

# Some other matrices for model evaluation

In [None]:
  class_name = validation_set.class_names
  class_name

In [None]:
test_set = tf.keras.utils.image_dataset_from_directory(
    '/content/drive/MyDrive/DataTrain/valid',
    labels="inferred",
    label_mode="categorical",
    class_names=None,
    color_mode="rgb",
    batch_size=32,
    image_size=(128, 128),
    shuffle=False,# Setting shuffle as False for the testing passes the folders from validation folder sequentially from starting to bottom
    seed=None,
    validation_split=None,
    subset=None,
    interpolation="bilinear",
    follow_links=False,
    crop_to_aspect_ratio=False,
    pad_to_aspect_ratio=False,
    verbose=True,
)
test_set = test_set.prefetch(tf.data.AUTOTUNE)


In [None]:
y_pred = model.predict(test_set)
y_pred, y_pred.shape

In [None]:
predicted_categories = tf.argmax(y_pred,axis=1) # this goes to inside y_pred and extract the maximum value and return the index of that max value and axis=1 means it return the index in column wise

In [None]:
predicted_categories

In [None]:
# true_categories = tf.concat( [y for x,y in test_set], axis=0)
# true_categories
true_categories = tf.concat([labels for _, labels in test_set], axis=0)
true_categories = tf.argmax(true_categories, axis=1)
true_categories

In [None]:
# Y_true=predicted_categories = tf.argmax(y_pred,axis=1)
# Y_true
Y_true = true_categories
Y_pred = predicted_categories

In [None]:
# Calcualate Precision recall
# Precision measures the precentage made by the model that are correct.
# Recall measures the percenatage of the relevant data points that were correctly identified by the model.


In [None]:
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
#print(classification_report(Y_true, predicted_categories target_names=class_name))
print(classification_report(Y_true.numpy(), Y_pred.numpy(), target_names=class_name))


In [None]:
# F-1 score calculation, perfect model have F-1 score of 1.
# cm = confusion_matrix(Y_true, predicted_categories)
cm = confusion_matrix(Y_true.numpy(), Y_pred.numpy())
cm.shape()

# Visualization of Confusion matrix

In [None]:
#sns.heatmap(cm)
plt.figure(figsize=(40, 40))
sns.heatmap(cm, annot=True, annot_kws={'size':10})
plt.xlabel("Predicted Class", fontsize=20)
plt.ylabel("Actual Class", fontsize=20)
plt.title("Plant Disease Confusion Matrix", fontsize=20)
plt.show()
# the result is saying it is confusion matix and the diagonal element is saying that it belongs to that class and actually it belings to that class
# we can tune this model by adjusting filters, improving no. of nuerons, adjusting loss function