In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import random
import time
import os
import cv2
import tensorflow as tf
import matplotlib.image as mpimg
from sklearn.datasets import load_iris
from sklearn.model_selection import KFold
from sklearn.pipeline import Pipeline
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score
from sklearn.datasets import fetch_olivetti_faces
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.metrics import accuracy_score
from skimage.transform import resize
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from skimage.color import rgb2gray
from tqdm.notebook import tqdm
from mpl_toolkits.mplot3d import Axes3D
from tensorflow.keras import layers, models
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.vgg16 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout

gpus = tf.config.experimental.list_physical_devices('GPU') #with import tensorflow
tf.config.experimental.set_memory_growth(gpus[0], True) # me too



Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
2024-02-27 13:51:25.856960: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-27 13:51:25.911730: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-27 13:51:25.911825: E external/local_xla/xla/stream_executor/cuda/c

In [None]:
path = "/content/train/" ##

seed = 13
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
train_df = pd.DataFrame({"file" : os.listdir("./train")})
train_df["label"] = train_df["file"].apply(lambda x: x.split(".")[0])
train_df.head()

In [None]:
test_df = pd.DataFrame({"file":os.listdir("./test1")})
test_df.head()

In [None]:
train_data, val_data = train_test_split(train_df,
                                        test_size=0.2,
                                        stratify=train_df["label"],
                                        random_state=13)

In [None]:

train_datagen = ImageDataGenerator(
    rotation_range = 15,
    horizontal_flip=True,
    preprocessing_function = preprocess_input
)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [None]:
FILES = "/content/" ##

batch_size = 160
train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_data,
    directory = FILES + "train/",
    x_col = "file",
    y_col = "label",
    class_mode = "categorical",
    target_size = (224,224),
    batch_size = batch_size,
    seed = 13,
)

In [None]:

val_generator = val_datagen.flow_from_dataframe(
    dataframe = val_data,
    directory = FILES + "train/",
    x_col = "file",
    y_col = "label",
    class_mode = "categorical",
    target_size = (224,224),
    batch_size = batch_size,
    seed = 13,
    shuffle=False
)

In [None]:
base_model = VGG16(
    weights = "imagenet",
    input_shape = (224,224, 3),
    include_top = False
)

In [None]:
for layers in base_model.layers:
  layers.trainable=False
base_model.summary()

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
def vgg16_pretrained():
  model = Sequential([
      base_model,
      GlobalAveragePooling2D(),
      Dense(100, activation="relu"),
      Dropout(0.4),
      Dense(64, activation="relu"),
      Dense(2,activation="softmax")
  ])
  return model

In [None]:
tf.keras.backend.clear_session()

In [None]:
model = vgg16_pretrained()

model.compile(loss="categorical_crossentropy",
              optimizer="adam",
              metrics="accuracy")
model.summary()

In [None]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
    monitor="val_accuracy",
    patience=2,
    verbose=1,
    factor=0.5,
    min_lr=0.000000001
)

In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=5,
    verbose=1,
    mode="max"
)

In [None]:

directory = './'
if not os.path.exists(directory):
    os.makedirs(directory)
checkpoint = tf.keras.callbacks.ModelCheckpoint(
    monitor = "val_accuracy",
    filepath = os.path.join(directory, "catdog_vgg16_.{epoch:02d}-{val_accuracy:.6f}.hdf5"),
    verbose = 1,
    save_best_only = True,
    save_weights_only = True
)

In [None]:
history = model.fit(
    train_generator,
    epochs = 10,
    validation_data = val_generator,
    validation_steps = val_data.shape[0] // batch_size,
    steps_per_epoch = train_data.shape[0] // batch_size,
    callbacks = [reduce_lr, early_stopping, checkpoint]
    
)

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12,4))

sns.lineplot(x=range(len(history.history["loss"])),
             y=history.history["loss"], ax=axes[0],
             label = "Training Loss")

sns.lineplot(x=range(len(history.history["loss"])),
             y=history.history["val_loss"], ax=axes[0],
             label = "Validation Loss")

sns.lineplot(x=range(len(history.history["accuracy"])),
             y=history.history["accuracy"], ax=axes[1],
             label = "Training Accuracy")

sns.lineplot(x=range(len(history.history["accuracy"])),
             y=history.history["val_accuracy"], ax=axes[1],
             label = "Validation Accuracy")

axes[0].set_title("Loss")
axes[1].set_title("Accuracy")

sns.despine()
plt.show()


In [None]:
val_pred = model.predict(val_generator,
                         steps=np.ceil(val_data.shape[0] / batch_size))
val_data.loc[:, "val_pred"] = np.argmax(val_pred, axis=1)

labels = dict((v, k) for k, v in val_generator.class_indices.items())

val_data.loc[:, "val_pred"] = val_data.loc[:, "val_pred"].map(labels)


In [None]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import ConfusionMatrixDisplay

fig, ax =plt.subplots(figsize = (9, 6))

cm = confusion_matrix(val_data["label"], val_data["val_pred"])

disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=["cat", "dog"])

disp.plot(cmap= plt.cm.Blues, ax=ax)

ax.set_title("valudation Set")
plt.show()

In [None]:
val_errors = val_data[(val_data.label) != (val_data.val_pred)].reset_index(drop=True)
val_errors

In [None]:
fig = plt.figure(1, figsize=(24, 20))
for i in range(81):
    if i == len(val_errors):
        break
    plt.subplot(9, 9, i+1)
    image = plt.imread("./train/" + val_errors.file[i])
    plt.imshow(image)
    plt.axis("off")
    plt.title(f"""True Val : {val_errors["label"][i]} \n Prediction : {val_errors["val_pred"][i]}""")

plt.tight_layout()
plt.show()