# Prodigy InfoTech
## Machine Learning Internship Task

### Task-3: Implement a Support Vector Machine (SVM) to classify the images of dogs and cats from the kaggle dataset
### by: Deepank Tyagi

# 1. Importing Libraries

In [41]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from skimage.transform import resize
from skimage.io import imread

import matplotlib.pyplot as plt
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score 
from sklearn.metrics import classification_report


# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# /kaggle/input/dogs-cats-images/dataset/test_set/dogs/dog.4329.jpg  -- This is an instance of the image present inside the input dataset

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

# 2. Data Transformation from .jpg format to a Numpy array of size 50 x 50 x 3 

In [43]:
Categories={"cats":0,"dogs":1}
flat_data_arr=[] #input array 
target_arr=[] #output array 
datadir='/kaggle/input/dogs-cats-images/dataset/training_set'
#path which contains all the categories of images 
for i in Categories.keys(): 
    print(f'loading... category : {i}') 
    path=os.path.join(datadir,i) 
    for img in os.listdir(path): 
        img_array=imread(os.path.join(path,img)) 
        img_resized=resize(img_array,(50,50,3)) 
        flat_data_arr.append(img_resized.flatten()) 
        target_arr.append(Categories[i]) 
    print(f'loaded category:{i} successfully') 
flat_data=np.array(flat_data_arr) 
target=np.array(target_arr)


loading... category : cats
loaded category:cats successfully
loading... category : dogs
loaded category:dogs successfully


In [44]:
target

array([0, 0, 0, ..., 1, 1, 1])

In [45]:
#dataframe 
df=pd.DataFrame(flat_data)  
df['Target']=target 
df.shape

(8000, 7501)

In [46]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,7491,7492,7493,7494,7495,7496,7497,7498,7499,Target
0,0.893574,0.795623,0.46624,0.897426,0.804239,0.476445,0.899916,0.813369,0.487788,0.908732,...,0.965395,0.835128,0.468709,0.938459,0.815807,0.446199,0.908224,0.797824,0.423863,0
1,0.82957,0.703083,0.683669,0.861177,0.798782,0.792381,0.814652,0.710885,0.69174,0.861401,...,0.130778,0.077712,0.051136,0.110141,0.063025,0.045822,0.07335,0.051951,0.040568,0
2,0.01642,0.01642,0.024263,0.022411,0.022411,0.030251,0.010398,0.010398,0.017773,0.004878,...,0.441801,0.287559,0.140733,0.406142,0.256772,0.118479,0.417788,0.256849,0.111534,0
3,0.151232,0.16759,0.167623,0.174395,0.187616,0.204817,0.14929,0.171805,0.167438,0.155998,...,0.280683,0.212633,0.190408,0.276556,0.208559,0.190989,0.200887,0.14831,0.124248,0
4,0.20529,0.20529,0.20529,0.208228,0.208228,0.208228,0.215489,0.215489,0.215489,0.216669,...,0.001477,0.109318,0.121062,0.011695,0.121017,0.131868,0.013829,0.121297,0.13251,0


# 3. Splitting Data into Independent and Dependent Variables

In [47]:
#input data  
x=df.iloc[:,:-1]  
#output data 
y=df.iloc[:,-1]

# 4. Train Test Split

In [48]:
# Splitting the data into training and testing sets 
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.20, 
                                               random_state=45, 
                                               stratify=y) 

# 5. Model Training

In [49]:
# Creating a support vector classifier 
model=svm.SVC(probability=True) 


In [50]:
model.fit(x_train,y_train)       # model training

In [51]:
# Testing the model using the testing data 
y_pred = model.predict(x_test) 
  
# Calculating the accuracy of the model 
accuracy = accuracy_score(y_pred, y_test) 
  
# Print the accuracy of the model 
print(f"The model is {accuracy*100}% accurate")

The model is 64.9375% accurate


In [61]:
test_image_read=imread('/kaggle/input/dogs-cats-images/dataset/test_set/dogs/dog.4332.jpg')             # testing the model on an instance taken from the input dataset

In [62]:
test_image_arr=resize(test_image_read,(50,50,3))

In [63]:
flattened_test_image_array=test_image_arr.flatten()
flattened_test_image_array=np.array(flattened_test_image_array)

In [64]:
print(model.predict(flattened_test_image_array.reshape(1,-1))[0])        # output is 1 means its a dog, which is correct

1


# 6. Model Summary

In [40]:
print(classification_report(y_test, y_pred, target_names=['cat', 'dog']))


              precision    recall  f1-score   support

         cat       0.65      0.65      0.65       800
         dog       0.65      0.65      0.65       800

    accuracy                           0.65      1600
   macro avg       0.65      0.65      0.65      1600
weighted avg       0.65      0.65      0.65      1600

