In [12]:
import os
from skimage.io import imread
from skimage.transform import resize
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd

In [2]:
# Loading the Image Data
input_dir = "/content/drive/MyDrive/cats_and_dogs_filtered/validation"
categories = ['cats', 'dogs']
data = []
labels = []

In [4]:
# Converting the image to numpy array, resizing, and flattening
for category_idx, category in enumerate(categories):
    category_path = os.path.join(input_dir, category)
    for file in os.listdir(category_path):
        img_path = os.path.join(category_path, file)
        img = imread(img_path)
        img = resize(img, (15, 15))
        img = img.flatten()
        data.append(img)
        labels.append(category_idx)

In [5]:
# Converting data and labels to numpy arrays
data = np.asarray(data)
labels = np.asarray(labels)

In [6]:
# Splitting the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)

# Feature scaling
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

# Logistic Regression
lr = LogisticRegression()
lr.fit(x_train, y_train)
y_pred = lr.predict(x_test)

# Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.5971223021582733


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Accuracy obtained by logistic regression is 60 percent.

In [7]:
# Decision Tree Model

from sklearn.tree import DecisionTreeClassifier
# Decision Tree Classifier
dt = DecisionTreeClassifier()
dt.fit(x_train, y_train)
y_pred = dt.predict(x_test)

# Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.6690647482014388


In [9]:
# Random Forest

from sklearn.ensemble import RandomForestClassifier
# Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(x_train, y_train)
y_pred = rf.predict(x_test)

# Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.7338129496402878


In [10]:
# Hyperparameter Tuning - Random Forest

from sklearn.model_selection import train_test_split, GridSearchCV

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10],
    'min_samples_split': [2, 5, 10]
}

# Perform grid search with cross-validation
grid_search = GridSearchCV(rf, param_grid, cv=5)
grid_search.fit(x_train, y_train)

# Get the best hyperparameters and model
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_

# Make predictions using the best model
y_pred = best_model.predict(x_test)

# Calculating accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Best Hyperparameters:", best_params)

Accuracy: 0.7338129496402878
Best Hyperparameters: {'max_depth': None, 'min_samples_split': 2, 'n_estimators': 100}


In [13]:
# Creating the dataframe for actual and predicted value

pd.DataFrame({"Actual_Value": y_test, "Predicted_Value": y_pred})

Unnamed: 0,Actual_Value,Predicted_Value
0,0,0
1,0,0
2,0,0
3,1,0
4,0,0
...,...,...
134,0,0
135,0,0
136,0,0
137,1,0


We have trained 1000 images of cats and dogs. Increase in sample image will lead to increase in accuracy.