In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/dogs-vs-cats/test1.zip
/kaggle/input/dogs-vs-cats/train.zip
/kaggle/input/dogs-vs-cats/sampleSubmission.csv


In [5]:
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import DataLoader,Dataset
from  torchvision.models import resnet18
from torch.optim import adam
from PIL import Image

# Unzipping train data

In [3]:
import zipfile

path = '/kaggle/input/dogs-vs-cats/train.zip'  # replace with your path

with zipfile.ZipFile(path, 'r') as zip_ref:
    zip_ref.extractall('train')  # replace with your destination directory

# Unzipping test data

In [30]:
path = '/kaggle/input/dogs-vs-cats/test1.zip'  # replace with your path

with zipfile.ZipFile(path, 'r') as zip_ref:
    zip_ref.extractall('test')  # replace with your destination directory

In [4]:
# transform for images
transform=transforms.Compose([
    transforms.Resize((224,224),antialias=True),  #because the resNet is trained on imageNet of dim 224,224 RGB
    transforms.ToTensor()
])

# **Creating custom dataset for training set**

In [19]:
class Data(Dataset):
    def __init__(self,path):
        self.path=path
        self.files=os.listdir(path)
        self.mapping={'dog':0,'cat':1}
    def __len__(self):
        return len(self.files)
    def __getitem__(self,ind):
        x=Image.open(self.path+'/'+self.files[ind])
        return transform(x),self.mapping[self.files[ind][0:3]]
X_train=Data('train/train')
train_loader=DataLoader(X_train,batch_size=64,shuffle=True)

# **Creating custom dataset for testing set**

In [33]:
class test_Data(Dataset):
    def __init__(self,path):
        self.path=path
        self.files=os.listdir(path)
    def __len__(self):
        return len(self.files)
    def __getitem__(self,ind):
        x=Image.open(self.path+'/'+self.files[ind])
        return transform(x)
    
    
X_test=test_Data('test/test1')
test_loader=DataLoader(X_test,batch_size=64,shuffle=True)

# Loading **resnet18** as **pre-trained feature extractor**

In [20]:
model=resnet18(pretrained=True)
class Identity(nn.Module):
    def __init__(self):
        super().__init__()
    def forward(self,x):
        return x
model.fc=Identity()
for i in model.parameters():
    i.requires_grad=False



In [21]:
device=torch.device('cpu')
if torch.cuda.is_available(): device=torch.device('cuda')
device

device(type='cuda')

# Extracting features of training data

In [23]:
model=model.to(device)
embeddings=[];labels=[]
for i,(x,y) in enumerate(train_loader):
    if i%50==0: print(i)
    x=x.to(device);y=y.to(device)
    pred=model(x)
    embeddings+=list(pred.cpu().numpy())
    labels+=list(y.cpu().numpy())

0
50
100
150
200
250
300
350


# Extracting features of testing data

In [35]:
model=model.to(device)
test_embeddings=[]
for i,x in enumerate(test_loader):
    if i%50==0: print(i)
    x=x.to(device)
    pred=model(x)
    test_embeddings+=list(pred.cpu().numpy())

0
50
100
150


In [24]:
embeddings=np.array(embeddings);labels=np.array(labels)  #training data
len(embeddings),len(labels)

(25000, 25000)

In [25]:
print(embeddings.shape,labels.shape)

(25000, 512) (25000,)


In [37]:
test_embeddings=np.array(test_embeddings)    #testing data
len(test_embeddings)

12500

# Training **SVM** on extrcated features 

In [27]:
from sklearn import svm
from sklearn.model_selection import train_test_split

X = embeddings
y = labels

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a SVM classifier
clf = svm.SVC()

# Train the model using the training sets
clf.fit(X_train,y_train)

# Performance

In [29]:
y_pred = clf.predict(test_embeddings)

from sklearn.metrics import classification_report

# Model Evaluation
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.98      0.98      0.98      2454
           1       0.98      0.98      0.98      2546

    accuracy                           0.98      5000
   macro avg       0.98      0.98      0.98      5000
weighted avg       0.98      0.98      0.98      5000



# Saving predictions on **test data**

In [40]:
predictions=clf.predict(test_embeddings)
df=pd.DataFrame({'Labels':predictions})
df.to_csv('Predictions_on_test_set.csv')