In [1]:
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import classification_report,confusion_matrix
import matplotlib.pyplot as plt
from tqdm import tqdm
import joblib
from sklearn.model_selection import GridSearchCV
import cv2
import seaborn as sns
import time
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from zipfile import ZipFile



In [2]:
dataset_train="C:/Users/abhin/Downloads/train.zip"
with ZipFile(dataset_train,'r') as zip:
    zip.extractall()

In [3]:
folder_path=f"C:/Users/abhin/Downloads"
os.makedirs(folder_path,exist_ok=True)

confusion_image_path=os.path.join(folder_path,'confusion matrix.png')
classification_file_path=os.path.join(folder_path,'classification_report.txt')
model_file_path=os.path.join(folder_path,"svm_model.pkl")

train_dir="C:\\Users\\abhin\\Downloads\\train\\train"

test_dir=r"C:\\Users\\abhin\\Downloads\\test1\\test1"

In [4]:
import os
import cv2
from tqdm import tqdm

# Define the directory containing the images
#train_dir = r"C:\\Users\\abhin\\Downloads\\train\\train"  # Adjust this path as needed

# Initialize lists for features and labels
features = []
labels = []
image_size = (50, 50)  # Desired image size

# Get the list of image files in the directory
train_images = os.listdir(train_dir)

# Process each image file
for image in tqdm(train_images, desc="Processing Train Images"):
    # Create the full path to the image file
    image_path = os.path.join(train_dir, image)
    
    # Check if the file is an image (optional)
    if not image.lower().endswith(('.png', '.jpg', '.jpeg')):
        continue
    
    # Determine the label based on the filename
    if image.startswith('cat'):
        label = 0
    else:
        label = 1
    
    # Read and process the image
    image_read = cv2.imread(image_path)  # Read the image
    if image_read is not None:
        image_resized = cv2.resize(image_read, image_size)  # Resize the image
        image_normalized = image_resized / 255.0  # Normalize the image
        image_flatten = image_normalized.flatten()  # Flatten the image
        features.append(image_flatten)  # Append the processed image to the list
        labels.append(label)  # Append the label to the list

# Output the number of processed images
print(f"Processed {len(features)} images.")


Processing Train Images: 100%|██████████| 25000/25000 [02:23<00:00, 174.29it/s]

Processed 25000 images.





In [5]:
del train_images

In [6]:
features=np.asarray(features)
labels=np.asarray(labels)

x_train,x_test,y_train,y_test=train_test_split(features,labels,test_size=0.2,shuffle=True,random_state=42)


In [7]:
del features
del labels

In [8]:
n_components=0.8
pca=PCA(n_components=n_components)
svm=SVC()
pca=PCA(n_components=n_components, random_state=42)
pipeline=Pipeline([('pca',pca),
                   ('svm',svm)])

In [9]:
param_grid={
    'pca__n_components':[2,1,0.9,0.8],
    'svm__kernel':['linear','rbf','poly','sigmoid'],
}

In [None]:
start_time=time.time()

grid_search=GridSearchCV(pipeline,param_grid,cv=3,verbose=4) 
grid_search.fit(x_train,y_train)

end_time=time.time()

Fitting 3 folds for each of 16 candidates, totalling 48 fits
[CV 1/3] END pca__n_components=2, svm__kernel=linear;, score=0.528 total time=  37.9s
[CV 2/3] END pca__n_components=2, svm__kernel=linear;, score=0.525 total time=  34.6s
[CV 3/3] END pca__n_components=2, svm__kernel=linear;, score=0.537 total time=  31.3s
[CV 1/3] END pca__n_components=2, svm__kernel=rbf;, score=0.570 total time=  28.9s
[CV 2/3] END pca__n_components=2, svm__kernel=rbf;, score=0.560 total time=  26.8s
[CV 3/3] END pca__n_components=2, svm__kernel=rbf;, score=0.564 total time=  26.1s
[CV 1/3] END pca__n_components=2, svm__kernel=poly;, score=0.504 total time=  19.1s
[CV 2/3] END pca__n_components=2, svm__kernel=poly;, score=0.485 total time=  19.1s
[CV 3/3] END pca__n_components=2, svm__kernel=poly;, score=0.492 total time=  19.9s
[CV 1/3] END pca__n_components=2, svm__kernel=sigmoid;, score=0.507 total time=  18.0s
[CV 2/3] END pca__n_components=2, svm__kernel=sigmoid;, score=0.495 total time=  18.8s
[CV 3/

In [None]:
del x_train
del y_train

In [None]:
best_pipeline=grid_search.best_estimator_
best_params=grid_search.best_params_
best_score=grid_search.best_score_

print("Best Parameters: ",best_params)
print("beat Score: ",best_score)

In [None]:
accuracy=best_pipeline.score(x_test,y_test)
print("Accuracy: ",accuracy)

In [None]:
y_pred=best_pipeline.predict(x_test)

target_names=['Cat','Dog']
classification_rep= classification_report(y_test,y_pred,target_names=target_names)
print("Clasiification Report:\n",classification_rep)

with open(classification_file_path,'w') as file:
    file.write(classification_rep)

In [None]:
cm=confusion_matrix(y_test,y_pred)
sns.heatmap(cm,annot=True,fmt="d",cmap="Blues")
plt.xlabel("Predicted Labels")
plt.ylabel("True labels")
plt.savefig(confusion_image_path)
plt.show()