In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch import optim
import cv2 as cv
import torch.nn.functional as F
from torchvision import datasets, transforms, models
from torch.utils.data.sampler import SubsetRandomSampler
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
import copy
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
torch.cuda.is_available()

In [3]:
device = torch.device('cuda' if torch.cuda.is_available() else "cpu")

In [4]:
print(device)

In [5]:
data_transforms = transforms.Compose([
        transforms.ToPILImage(),
        transforms.CenterCrop(512),
        transforms.Resize(224),
        transforms.ToTensor()  
    ])

In [6]:
def getPaths(path):
    path_images = []
    class_labels = []
    class_count = 0
    for _, dirs, _ in os.walk(path):
        for dir in dirs:
            dirpath = os.path.join(path, dir)
            for file in os.listdir(dirpath):
                path_images.append(os.path.join(dirpath, file))
                class_labels.append(class_count)
            class_count += 1
    return path_images, class_labels, class_count

In [7]:
path_images, class_labels, class_count = getPaths(path = "../input/intel-image-classification/seg_train/seg_train")

In [8]:
class_labels

In [9]:
model = torchvision.models.alexnet(pretrained=True)

In [10]:
# for param in model.parameters(): 
#     param.requires_grad = False

In [11]:
print(model)

In [12]:
class FeatureExtractor(nn.Module):
  def __init__(self, model):
    super(FeatureExtractor, self).__init__()

    self.features = list(model.features)
    self.features = nn.Sequential(*self.features)

    self.pooling = model.avgpool

    self.flatten = nn.Flatten()
    self.fc = model.classifier[6]
  
  def forward(self, x):
    out = self.features(x)
    out = self.pooling(out)
    out = self.flatten(out)
    out = self.fc(out)
    return out 

In [13]:
new_model = FeatureExtractor(model)
feature_count = model.classifier[6].in_features
model.classifier[6] = nn.Linear(in_features=feature_count,out_features=feature_count)

In [14]:
print(model)

In [15]:
print(new_model)

In [16]:
def feature_ext(inp_model, transform, paths):
    features = []
    inp_model.to(device)
    for path in paths:
        img = cv.imread(path)
        img = transform(img)
        img = img.reshape(1, 3, 224, 224)
        img = img.to(device)
        with torch.no_grad():
            feature = inp_model(img)
        features.append(feature.cpu().detach().numpy().reshape(-1))

    return np.array(features)

In [17]:
features = feature_ext(model, data_transforms, path_images)

In [18]:
from sklearn.linear_model import LogisticRegression

In [30]:
clf = LogisticRegression().fit(features, class_labels)

In [31]:
test_path_images, test_labels, _ = getPaths(path = "../input/intel-image-classification/seg_test/seg_test")

In [32]:
test_features = feature_ext(model, data_transforms, test_path_images)

In [33]:
labels_pred = clf.predict(test_features)

In [34]:
from sklearn.metrics import accuracy_score

In [35]:
accuracy_score(test_labels, labels_pred)

# Bike vs Horse dataset

In [39]:
bike_horse_images, bike_horse_labels, _ = getPaths(path = "../input/bike-vs-horse/Assignment2_BikeHorses")

In [40]:
bike_horse_features = feature_ext(model, data_transforms, bike_horse_images)

In [41]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(bike_horse_features, bike_horse_labels, test_size=0.33, random_state=42)

In [42]:
clf = LogisticRegression().fit(X_train, y_train)

In [44]:
y_pred = clf.predict(X_test)

In [45]:
accuracy_score(y_test, y_pred)