In [40]:
import os 
import cv2
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score


In [46]:
path = 'data/train/'
data = []  # Initialize list to hold image data
labels = []  # Initialize list to hold labels

count = 0 
# loading and setting up my training images
for filename in os.listdir(path):
    count += 1
    # Check if the file is an image (you can add more image extensions if needed)
    if filename.endswith('.jpg') and (count < 1000 or (count > 12500 and count < 13500)):
        img = cv2.imread(os.path.join(path, filename))
        img = (cv2.resize(img, (170, 100))).flatten()
        img = img/255
        data.append(img)  # Append each flattened image to the list
        if filename.startswith('dog'):
            labels.append(1)
        else:
            labels.append(0)

In [47]:
# Convert lists to NumPy arrays
data = np.array(data)
labels = np.array(labels)

In [29]:
pca = PCA(n_components= 1900)
data_pca = pca.fit_transform(data)

In [48]:
# Splitting data
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True)

In [None]:
# Performing grid search to try different combinations of parameters to get the best one
clf = SVC(kernel='rbf', random_state=42)
parameters = [{'gamma':[0.01, 0.0001], 'C':[1, 10, 1000]}]
gridSearch = GridSearchCV(clf, parameters, verbose=10)
gridSearch.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5; 1/6] START C=1, gamma=0.01.............................................
[CV 1/5; 1/6] END ..............C=1, gamma=0.01;, score=0.500 total time= 2.5min
[CV 2/5; 1/6] START C=1, gamma=0.01.............................................
[CV 2/5; 1/6] END ..............C=1, gamma=0.01;, score=0.500 total time= 2.7min
[CV 3/5; 1/6] START C=1, gamma=0.01.............................................
[CV 3/5; 1/6] END ..............C=1, gamma=0.01;, score=0.500 total time= 2.7min
[CV 4/5; 1/6] START C=1, gamma=0.01.............................................
[CV 4/5; 1/6] END ..............C=1, gamma=0.01;, score=0.502 total time= 2.9min
[CV 5/5; 1/6] START C=1, gamma=0.01.............................................
[CV 5/5; 1/6] END ..............C=1, gamma=0.01;, score=0.502 total time= 2.7min
[CV 1/5; 2/6] START C=1, gamma=0.0001...........................................
[CV 1/5; 2/6] END ............C=1, gamma=0.0001;,

In [43]:
best = gridSearch.best_estimator_
best

In [44]:
y_pred = best.predict(X_test)
y_pred

array([0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1,
       0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0,
       0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1,

In [45]:
accuracy_score(y_test, y_pred)

0.6225