## Code Sources

- https://towardsdatascience.com/image-feature-extraction-using-pytorch-e3b327c3607a

- https://github.com/MnCSSJ4x/VR-MiniProject/blob/main/VR3b.ipynb

- https://colab.research.google.com/github/ashishpatel26/Awesome-Pytorch-Tutorials/blob/main/17.Pytorch%20Transfer%20learning%20with%20Caltech101.ipynb

## CUDA

In [57]:
from torch import cuda
from torch import device

In [58]:
'cuda:0' if cuda.is_available() else 'cpu'

'cuda:0'

In [59]:
device = device('cuda:0' if cuda.is_available() else 'cpu')


## VGG

In [60]:
from torchvision.models import vgg11

In [61]:
vgg = vgg11(pretrained=True)
print(vgg)



VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

## Feature Extractor

In [62]:
from torch import nn
from torchsummary import summary

In [63]:
class FeatureExtractor(nn.Module):

  def __init__(self, model):
    super(FeatureExtractor, self).__init__()
    # Extract Feature Layers
    self.features = model.features
    # Extract Average Pooling Layer
    self.pooling = model.avgpool
    # Convert the image into one-dimensional vector
    self.flatten = nn.Flatten()
  
  def forward(self, x):
    # Take image x and return a feature vector
    out = self.features(x)
    out = self.pooling(out)
    out = self.flatten(out)
    return out 

feature_extractor = FeatureExtractor(vgg)
feature_extractor = feature_extractor.to(device)

In [64]:
print(summary(feature_extractor, input_size=(3, 224, 224)))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 224, 224]           1,792
              ReLU-2         [-1, 64, 224, 224]               0
         MaxPool2d-3         [-1, 64, 112, 112]               0
            Conv2d-4        [-1, 128, 112, 112]          73,856
              ReLU-5        [-1, 128, 112, 112]               0
         MaxPool2d-6          [-1, 128, 56, 56]               0
            Conv2d-7          [-1, 256, 56, 56]         295,168
              ReLU-8          [-1, 256, 56, 56]               0
            Conv2d-9          [-1, 256, 56, 56]         590,080
             ReLU-10          [-1, 256, 56, 56]               0
        MaxPool2d-11          [-1, 256, 28, 28]               0
           Conv2d-12          [-1, 512, 28, 28]       1,180,160
             ReLU-13          [-1, 512, 28, 28]               0
           Conv2d-14          [-1, 512,

In [65]:
import cv2
from torchvision import transforms
from torch import no_grad

## Extract Features for Caltech 101 Dataset

In [66]:
import tarfile
from imutils import paths
import os
import cv2
import numpy as np
from torchvision import transforms
import pandas as pd

In [67]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [68]:
with tarfile.open('drive/MyDrive/101_ObjectCategories.tar.gz', 'r:gz') as tar:
    tar.extractall()

In [69]:
image_paths = list(paths.list_images('./101_ObjectCategories'))

images = []
labels = []

for img_path in image_paths:

    label = img_path.split(os.path.sep)[-2]
    if label == "BACKGROUND_Google":
        continue
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    images.append(img)
    labels.append(label)
    
images = np.array(images, dtype=object)
labels = np.array(labels)

In [70]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [71]:
features_array = []

for i in range(len(images)):
    img = images[i]
    img = transform(img)
    img = img.reshape(1, 3, 224, 224)
    img = img.to(device)
    with no_grad():
        img_features = feature_extractor(img)
    img_features = img_features.cpu().detach().numpy().reshape(-1)
    features_array.append(img_features)

In [72]:
np.array(features_array).shape

(8677, 25088)

In [73]:
X = np.array(features_array)
y = np.array(labels)

In [74]:
df = pd.DataFrame(X)
df['label'] = y

In [75]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25079,25080,25081,25082,25083,25084,25085,25086,25087,label
0,0.010238,0.659768,0.000000,0.000000,0.000000,0.623120,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,okapi
1,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,1.296640,0.924659,0.714404,2.672418,2.833159,1.084479,0.553126,2.131860,2.483721,okapi
2,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.047247,0.0,0.000000,0.000000,...,2.871006,2.271220,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,okapi
3,0.000000,0.000000,0.000000,0.000000,0.000000,2.053960,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,okapi
4,0.035070,1.269113,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,2.564570,3.207772,0.379335,2.440877,1.041688,0.000000,okapi
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8672,0.994941,0.255210,0.000000,0.000000,0.000000,0.000000,0.132186,0.0,0.000000,0.765939,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,stop_sign
8673,0.000000,0.000000,0.291836,1.152500,4.590219,0.000000,0.000000,0.0,2.784130,3.775471,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.953423,0.531103,0.000000,stop_sign
8674,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.0,0.234216,6.315652,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,stop_sign
8675,0.000000,1.118129,1.098119,0.100004,1.713001,2.250898,1.780118,0.0,3.472008,2.819709,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.232650,0.000000,0.000000,stop_sign


In [76]:
X=df.drop("label",axis=1)
y=df["label"]
import pickle
# Open a file and use dump()
with open('X.pkl', 'wb') as file:
    pickle.dump(X, file)
    
with open('y.pkl', 'wb') as file:
    pickle.dump(y, file)

In [77]:
import pickle
  
# Open the file in binary mode
with open('/content/drive/MyDrive/X.pkl', 'rb') as file:
    # Call load method to deserialze
    X = pickle.load(file)
with open('/content/drive/MyDrive/y.pkl', 'rb') as file:
    # Call load method to deserialze
    y = pickle.load(file)

### PCA with n_components=3000

In [78]:
from sklearn.decomposition import PCA
pca = PCA(n_components=3000)
X=pca.fit_transform(X)
X

array([[ 8.6046972e+00, -1.6968899e+01, -6.3934875e-01, ...,
        -3.3809289e-01,  5.8985364e-02, -4.0231389e-01],
       [ 7.1702423e+00, -1.0276338e+01,  3.6042738e+00, ...,
         4.3040797e-01, -2.8411938e-02,  3.8878322e-01],
       [-1.6676918e+01, -2.5323187e+01, -1.4546244e+01, ...,
         7.3668361e-02, -1.0873244e-01, -6.6175240e-01],
       ...,
       [-1.8654156e+01, -6.7168102e+00, -9.1311483e+00, ...,
         4.0779757e-01, -7.9280919e-01,  1.1923943e+00],
       [ 1.4903467e+00, -1.3152141e+01, -3.4190893e+00, ...,
        -4.9426234e-03, -1.1872965e+00, -6.1413855e-03],
       [ 1.2132151e+01, -1.5452773e+01,  1.3334217e+01, ...,
         3.4842189e-02,  1.3406953e-02, -3.2451493e-01]], dtype=float32)

In [79]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train.shape

(5813, 3000)

In [80]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=100000).fit(X_train, y_train)
y_pred = clf.predict(X_test)
clf.score(X_test,y_test)

0.9221368715083799

In [81]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',kernel='linear'))
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.5495810055865922

In [82]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',kernel='rbf'))
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.31110335195530725

In [83]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
neigh.score(X_test,y_test)

0.6466480446927374

In [84]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=20, random_state=42,n_estimators=500)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.515013966480447

In [85]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=10, random_state=42)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.42178770949720673

### Without PCA

In [86]:
import pickle
  
# Open the file in binary mode
with open('/content/drive/MyDrive/X.pkl', 'rb') as file:
    # Call load method to deserialze
    X = pickle.load(file)
with open('/content/drive/MyDrive/y.pkl', 'rb') as file:
    # Call load method to deserialze
    y = pickle.load(file)

In [87]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)
X_train.shape

(5813, 9216)

In [88]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=100000).fit(X_train, y_train)
y_pred = clf.predict(X_test)
clf.score(X_test,y_test)

0.9221368715083799

In [89]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',kernel='linear'))
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.8879189944134078

In [90]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',kernel='rbf'))
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.8348463687150838

In [91]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
neigh.score(X_test,y_test)

0.630586592178771

In [92]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=20, random_state=42,n_estimators=500)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.6801675977653632

In [93]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=10, random_state=42)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.5108240223463687

## Extract Features for Bikes vs Horses Dataset

In [94]:
from imutils import paths
import os
import cv2
import numpy as np
import pandas as pd
import torch

In [95]:
!unzip '/content/drive/MyDrive/Assignment2_BikeHorses.zip'

Archive:  /content/drive/MyDrive/Assignment2_BikeHorses.zip
replace Assignment2_BikeHorses/Bikes/0042.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


In [96]:
images = []
labels = []

In [97]:
image_paths = list(paths.list_images('./Assignment2_BikeHorses/Horses'))

In [98]:
for img_path in image_paths:
    label = 'horse'
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    images.append(img)
    labels.append(label)

In [99]:
image_paths = list(paths.list_images('./Assignment2_BikeHorses/Bikes'))

In [100]:
for img_path in image_paths:
    label = 'bike'
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    images.append(img)
    labels.append(label)

In [101]:
transform = transforms.Compose([
    transforms.ToPILImage(),
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean = [0.485,0.456,0.406], std=[0.229,0.224,0.225]),
])

In [102]:
features_array = []

for i in range(len(images)):
    img = images[i]
    img = transform(img)
    img = img.reshape(1, 3, 224, 224)
    img = img.to(device)
    with torch.no_grad():
        img_features = feature_extractor(img)
    img_features = img_features.cpu().detach().numpy().reshape(-1)
    features_array.append(img_features)

In [103]:
np.array(features_array).shape

(179, 25088)

In [104]:
X = np.array(features_array)
y = np.array(labels)

In [105]:
df = pd.DataFrame(X)
df['label'] = y

In [106]:
X=df.drop("label",axis=1)
y=df["label"]
import pickle
# Open a file and use dump()
with open('X_bikevshorse.pkl', 'wb') as file:
    pickle.dump(X, file)
    
with open('y_bikevshorse.pkl', 'wb') as file:
    pickle.dump(y, file)

In [107]:
import pickle
  
# Open the file in binary mode
with open('X_bikevshorse.pkl', 'rb') as file:
    # Call load method to deserialze
    X = pickle.load(file)
with open('y_bikevshorse.pkl', 'rb') as file:
    # Call load method to deserialze
    y = pickle.load(file)

## Model

In [108]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,25079,25080,25081,25082,25083,25084,25085,25086,25087,label
0,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.025331,2.444167,3.748494,1.629134,2.421952,1.067587,horse
1,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,horse
2,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,horse
3,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,horse
4,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,1.741845,0.117009,0.000000,0.000000,0.000000,0.302195,0.783861,0.000000,0.000000,horse
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.985562,0.000000,0.000000,0.037411,0.125173,0.000000,0.000000,bike
175,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,bike
176,0.143615,0.157072,0.0,0.0,0.0,0.0,0.0,0.0,0.732357,0.722169,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,bike
177,0.000000,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,...,2.219155,0.000000,0.000000,0.000000,0.889630,0.781400,0.340269,0.000000,0.000000,bike


In [109]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [110]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

pca = PCA(n_components=150)
X_pca = pca.fit_transform(X_scaled)

In [111]:
pca.explained_variance_ratio_.sum()

0.9630165

In [112]:
df_pca = pd.DataFrame(X_pca)
df_pca['label'] = labels

In [113]:
label_encoder = LabelEncoder()
df_pca['label']= label_encoder.fit_transform(df_pca['label'])

In [114]:
df_pca

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,141,142,143,144,145,146,147,148,149,label
0,-24.052631,-7.308313,6.023119,-11.855042,-4.026018,4.235047,4.629198,7.450704,-1.193925,0.283594,...,0.730948,-5.368753,-0.941075,2.200697,-0.212551,-0.346409,0.633629,-0.794190,0.010791,1
1,-30.392269,-6.124090,-9.394124,-12.403003,-16.250395,-4.209332,18.123777,26.876623,-11.464371,-10.298728,...,-0.156988,-1.897434,-1.189050,1.407846,2.031697,-0.254302,0.629129,-0.621455,0.572916,1
2,-40.689720,-1.737145,-14.706333,-9.182796,-3.575155,1.591406,-6.303109,-19.871731,2.633568,-3.188074,...,-0.323761,4.527821,1.838418,0.082660,1.242775,0.107580,-1.339644,-0.756711,0.317032,1
3,-31.084015,5.900604,-10.848145,16.341608,0.453425,-4.673359,-14.576808,-24.761694,15.646574,-4.431576,...,0.933790,0.222904,-0.813004,0.348171,-0.116827,0.380993,0.276049,0.388913,0.884402,1
4,-31.860512,0.250140,-2.969397,-4.813212,5.201406,1.077875,-12.110016,-5.707363,6.771334,1.460532,...,0.369469,0.003981,0.029985,0.784361,-0.663807,0.093001,0.756636,-0.716571,-0.789307,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
174,41.868370,-42.808819,-0.630585,0.179925,3.031354,-21.701130,15.378249,-3.581931,-15.064744,-3.455476,...,-0.738121,-0.282960,3.519186,-14.052541,-15.554027,-5.585742,-11.844168,36.552036,47.203747,0
175,13.108624,22.567858,2.565637,74.422958,-15.835024,-22.406250,-36.394875,23.587275,18.361597,12.543007,...,-0.713579,0.061013,0.424928,0.165537,-0.562429,0.627574,-0.683178,-0.095810,-0.255984,0
176,31.351440,43.827309,-2.012079,-25.492165,-6.229031,12.796922,9.121084,6.124635,23.289898,9.651016,...,3.049202,-0.439162,0.308848,-3.777290,0.311121,4.705581,10.790948,3.763995,0.803503,0
177,40.928211,-36.149387,-2.567742,-8.745031,3.405827,-17.957893,0.476254,-4.096447,-2.585455,1.792291,...,-6.784842,3.111198,-10.127982,-9.292953,-1.851978,-0.280379,-0.150911,-0.874866,-2.494065,0


In [115]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=42)
X_train.shape

(89, 25088)

In [116]:
from sklearn.linear_model import LogisticRegression
clf = LogisticRegression(max_iter=100000).fit(X_train, y_train)
y_pred = clf.predict(X_test)
clf.score(X_test,y_test)

1.0

In [117]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',kernel='linear'))
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.9444444444444444

In [118]:
from sklearn.pipeline import make_pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
clf = make_pipeline(StandardScaler(), SVC(gamma='auto',kernel='rbf'))
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

0.7444444444444445

In [119]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=10, random_state=42)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

1.0

In [120]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=20, random_state=42)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

1.0

In [121]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=50, random_state=42)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

1.0

In [122]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=20, random_state=42,n_estimators=500)
clf.fit(X_train, y_train)
clf.score(X_test,y_test)

1.0

In [123]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5)
neigh.fit(X_train, y_train)
neigh.score(X_test,y_test)

1.0