# Dog-Cat Classification using SVM(Support Vector Machine)

### Load image data

In [None]:
# to read image file and convert into array data
from skimage.io import imread
from skimage.transform import resize

# for numpy arrays
import numpy as np

# defining dir for images
dir = "dataset/train/"

animals = ["cat", "dog"]

# store images
image_features = []

# store labels for the respective images
image_labels = []

# reading and showing an image
# img = imread(dir + cat_type + str(0) + ".jpg")
# imshow(img)

# load images into data arrays
def generate_img_data(size, animals, imgtype, dir):
    # store image features and labels
    features = []
    labels = []
    for animal in animals:
        for i in range(size):
            # build the file path
            path = dir + animal + "." + str(i) + imgtype
            # read image
            img = imread(path)
            # resizing and flattening to 1D image 
            img = resize(img, (150, 150, 3)).flatten()
            # store the image in a feature list
            features.append(img)
            # store the image label in a label list
            labels.append(animal)
    return features, labels

# raw image data
image_features, image_labels = generate_img_data(1000, animals, ".jpg", dir)

# converting raw data to numpy array for model utilization
img_data = np.array(image_features)
img_labels = np.array(image_labels)

### Create dataframe from image data

In [49]:
# need to import pandas for dataframes
import pandas as pd

# creating img dataframe to feed into model
animaldata = pd.DataFrame(img_data)
animaldata["label"] = img_labels

# checking the shape of data
animaldata.shape

(2000, 67501)

### Spliting data into features and labels

#### features are image data
#### labels are names of the animal

In [50]:
# Size of animal data
print("Size of animal data: \n", len(animaldata), "\n")

# glance at data
print(animaldata.head())

#input data  
x = animaldata.iloc[:,:-1]  
#output data 
y = animaldata.iloc[:,-1]

# # Separate features
# X = animaldata.drop("label", axis = 1)

# # Separate labels
# Y = animaldata["label"]

Size of animal data: 
 2000 

          0         1         2         3         4         5         6  \
0  0.798806  0.645865  0.343904  0.815217  0.662276  0.360315  0.822486   
1  0.154919  0.174317  0.159261  0.158661  0.174558  0.170846  0.160470   
2  0.258682  0.201128  0.214404  0.224390  0.171584  0.183607  0.201301   
3  0.558339  0.522881  0.483010  0.558562  0.516440  0.478886  0.523867   
4  0.555814  0.515879  0.482742  0.596396  0.556461  0.523324  0.622525   

          7         8         9  ...     67491     67492         67493  \
0  0.669545  0.367584  0.832539  ...  0.006675  0.006675  4.885506e-07   
1  0.175946  0.187711  0.162110  ...  0.197737  0.138908  1.114610e-01   
2  0.154361  0.171906  0.218070  ...  0.140650  0.088461  1.408741e-01   
3  0.463714  0.431287  0.528344  ...  0.692322  0.668807  6.060492e-01   
4  0.582591  0.549454  0.646829  ...  0.030181  0.047528  7.797401e-02   

      67494     67495         67496     67497     67498         67499  lab

### Spliting Train-Test data

In [51]:
# using train-test-spliting function of sklearn
# import the train_test_split
from sklearn.model_selection import train_test_split

# x_train data and x_test data for features
# y_train data and y_test data for labels
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.20, random_state = 77, stratify=y)

### Model Selection

#### We will use Support Vector Machine(SVM) for classification

In [52]:
from sklearn import svm 

x_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,67490,67491,67492,67493,67494,67495,67496,67497,67498,67499
974,0.928626,0.916862,0.952156,0.914247,0.902483,0.937727,0.893291,0.881526,0.910267,0.869149,...,0.842617,0.902019,0.878590,0.886383,0.923554,0.900125,0.907918,0.886349,0.862920,0.870713
1196,0.694028,0.671773,0.370313,0.798406,0.770116,0.483822,0.821739,0.782993,0.513607,0.642215,...,0.365048,0.605695,0.627126,0.274169,0.786602,0.807364,0.450756,0.830876,0.852978,0.485234
1595,0.703932,0.781752,0.843784,0.690804,0.780851,0.852119,0.695814,0.787641,0.859021,0.742885,...,0.561600,0.538189,0.523089,0.543743,0.432546,0.443797,0.482689,0.480896,0.480580,0.515638
1071,0.414801,0.338899,0.295226,0.298742,0.280335,0.306033,0.350036,0.320098,0.339982,0.454537,...,0.387205,0.534574,0.494739,0.432962,0.507020,0.466236,0.387172,0.450078,0.418391,0.343592
1202,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,1.000000,...,1.000000,1.000000,1.000000,0.999948,1.000000,1.000000,0.998853,1.000000,1.000000,0.999948
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
112,0.082382,0.082468,0.075251,0.094962,0.097496,0.081870,0.107494,0.110449,0.093546,0.108454,...,0.551303,0.535894,0.535894,0.535894,0.534008,0.534008,0.534008,0.556545,0.556545,0.556545
645,0.593646,0.593646,0.625018,0.598276,0.598276,0.629649,0.602212,0.602212,0.633585,0.605785,...,0.111651,0.235916,0.125321,0.065346,0.194681,0.117124,0.041735,0.394769,0.364838,0.323669
157,0.080698,0.076750,0.012418,0.081028,0.069418,0.010335,0.080359,0.060982,0.008827,0.082374,...,0.036099,0.096263,0.100185,0.029597,0.078562,0.082484,0.011922,0.093874,0.097796,0.027657
1284,0.933060,0.741416,0.657686,0.959342,0.761596,0.678784,0.968131,0.764176,0.683418,0.949880,...,0.575241,0.747922,0.661647,0.575373,0.750261,0.663987,0.573791,0.747830,0.661556,0.571359


### Create a SVM model

In [53]:
# required to combine classifier with parameters and create model
from sklearn.model_selection import GridSearchCV

# define some parameters required for the model 
params = {'C' : [0.1, 1, 2, 100 ], 'gamma' : [0.0001, 0.001, 0.1, 1 ], 'kernel' : [ 'rbf', 'poly' ]} 

# create a svm classifier
svmclfr = svm.SVC(kernel = 'rbf', random_state = 0, probability=True)

# create a model by putting the parameters to the classifier
# model = GridSearchCV(svmclfr, params)

model = svmclfr

### Fitting data into the model

In [54]:
model.fit(x_train, y_train)

# from sklearn import svm, datasets
# from sklearn.model_selection import GridSearchCV
# iris = datasets.load_iris()

# print(iris.data.shape)

# parameters = {'kernel':('linear', 'rbf'), 'C':[1, 10]}
# svc = svm.SVC()
# clf = GridSearchCV(svc, parameters)
# clf.fit(iris.data, iris.target)
# sorted(clf.cv_results_.keys())

### Predict labels using test features

In [55]:
pred_y_labels = model.predict(x_test)

### Model Evaluation : Accuracy

#### we will use model accuracy score to find the accuracy

In [67]:
# find accuracy of a model
from sklearn.metrics import accuracy_score

acc = accuracy_score(pred_y_labels, y_test)

print("Model accuracy is ", round(acc*100), "(appox)")

Model accuracy is  63 (appox)


### Saving the model

#### We will use joblib to save our model object to a file

In [2]:
# to save the model for later use
import joblib

# save the file to local device
# modelfile = "model/dogcatpredictor.sav"
# joblib.dump(model, modelfile)

### Loading the model from local file

#### Same can be done using joblib

In [3]:
# load model from the file
loaded_model = joblib.load("model/dogcatpredictor.sav")

## Test on a data

In [6]:
# display image
from skimage.io import imread, imshow

# test data
dogpath = "dataset/test/8.jpg"

catpath = "dataset/test/21.jpg"

img = imread(dogpath) 

imshow(img)

img_resize = resize(img, (150,150,3)) 

test = [ img_resize.flatten() ]

probability = model.predict_proba(test) 

print(probability)

for ind,val in enumerate(animals): 
    print(f'{val} = {round(probability[0][ind]*100)}%') 

print("The predicted image is : " + model.predict(test))

FileNotFoundError: No such file: 'c:\Users\ASUS\OneDrive\Desktop\Internship Projects\Prodigy Infotech\Dog Cat Classification\dataset\test\8.jpg'