In [1]:
import os
import cv2

#Load the path for the kaggle data set which has fetched using the kaggle API
path  = "/content/drive/MyDrive/kaggle/train"

train_images = []
train_labels = []


# store the data in a list along with the encoded label
for img in os.listdir(path):
    img_path = os.path.join(path,img)
    pet_img = cv2.imread(img_path,0)
    pet_img = cv2.resize(pet_img,(50,50))
    pet_img = cv2.cvtColor(pet_img, cv2.COLOR_RGB2BGR)
    train_images.append(pet_img)
    if "cat" in img:
      train_labels.append(0)
    else:
      train_labels.append(1)

In [2]:
import numpy as np

train_images = np.array(train_images)
train_labels = np.array(train_labels)


In [3]:
x_train = train_images / 255.0
y_train = train_labels


In [5]:
from keras.applications.vgg16 import VGG16
SIZE = 50
#Load model without the classifier/fully connected layers
VGG_model = VGG16(weights='imagenet', include_top=False, input_shape=(SIZE, SIZE, 3))

#Make loaded layers as non-trainable. This is important as we want to work with pre-trained weights
for layer in VGG_model.layers:
	layer.trainable = False

VGG_model.summary()  #Trainable parameters will be 0



Model: "vgg16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 50, 50, 3)]       0         
                                                                 
 block1_conv1 (Conv2D)       (None, 50, 50, 64)        1792      
                                                                 
 block1_conv2 (Conv2D)       (None, 50, 50, 64)        36928     
                                                                 
 block1_pool (MaxPooling2D)  (None, 25, 25, 64)        0         
                                                                 
 block2_conv1 (Conv2D)       (None, 25, 25, 128)       73856     
                                                                 
 block2_conv2 (Conv2D)       (None, 25, 25, 128)       147584    
                                                                 
 block2_pool (MaxPooling2D)  (None, 12, 12, 128)       0     

In [6]:
import pickle
# Extract the features of the train data using the VGG16 pretrained model
feature_extractor=VGG_model.predict(x_train)
# Store it for future usage
filename = 'feature_train.feat'
pickle.dump(feature_extractor, open(filename, 'wb'))
features = feature_extractor.reshape(feature_extractor.shape[0], -1)



In [20]:
#Load the Test data
test_images = []
for img in os.listdir("/content/drive/MyDrive/kaggle/test1"):
    img_path = os.path.join("/content/drive/MyDrive/kaggle/test1/",img)
    pet_img = cv2.imread(img_path,0)
    pet_img = cv2.resize(pet_img,(50,50))
    img = cv2.cvtColor(pet_img, cv2.COLOR_RGB2BGR)
    test_images.append(img)


KeyboardInterrupt: ignored

In [12]:
test_images = np.array(test_images)

In [None]:
#Send test data through same feature extractor process
X_test_feature = VGG_model.predict(test_images)
X_test_features = X_test_feature.reshape(X_test_feature.shape[0], -1)

from sklearn import svm

#Create a svm Classifier
clf = svm.SVC(C=0.01,kernel ='poly',gamma=0.001) # Linear Kernel

#Train the model using thetraining features extracted by VGG16 i.e.features...and the labels i.e y_train
clf.fit(features, y_train)
filename = 'finalized_model.sav'
pickle.dump(clf, open(filename, 'wb'))

#Predict the response for test features
y_pred = clf.predict(X_test_features)

print(y_pred)

In [19]:
import pandas as pd
# Save the output file
# Create an empty DataFrame
df = pd.DataFrame(columns=['id', 'label'])


for i,img in enumerate(os.listdir("/content/drive/MyDrive/kaggle/test1")):
  df.loc[i] = [img, y_pred[i]]

# Save the DataFrame to a CSV file
df.to_csv('output.csv', index=False)