<a href="https://colab.research.google.com/github/Amar-Pratap-Singh/2022-tutorial-2/blob/main/2b_BikeVsHorse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install opencv-python
!pip install sklearn

In [30]:
import cv2 as cv
import os
import numpy as np
import sklearn
from sklearn.cluster import KMeans
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier

In [31]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# CATS AND DOGS DATASET

In [32]:
rdir_path = '/content/drive/MyDrive/CatsAndDogs/training_set'
nClusters = 128
svm_c = 0.05
svm_kernal = 'linear'
splitProp = 0.8

In [33]:
def getDataset(dataset_path):

    label = -1
    labels_list = []
    imgs_list = []
    
    dirs = [f for f in os.listdir(dataset_path) if os.path.isdir(os.path.join(dataset_path, f))]
    
    for directory in dirs:
        label = label + 1
        path = dataset_path+'/'+directory
        onlyfiles = [f for f in os.listdir(path) if os.path.isfile(os.path.join(path, f))]
        
        for file in onlyfiles:
            imgPath = path+'/'+file
            labels_list.append(label)
            imgs_list.append(imgPath)
    
    return (imgs_list, labels_list)

In [34]:
dataset = getDataset(rdir_path)
train_img_paths, test_img_paths, train_labels, test_labels = train_test_split(
                                                dataset[0], 
                                                dataset[1], 
                                                train_size=splitProp, 
                                                random_state = 42, 
                                                shuffle = True, 
                                                stratify = dataset[1])

In [35]:
def get_imgs(images_path, size=(224, 224)):
    imgs = []
    for img_name in images_path:
        img = Image.open(img_name)
        img = TF.resize(img, size)
        img = TF.to_tensor(img)
        imgs.append(img)
    imgs = torch.stack(imgs)
    return imgs

In [36]:
import torch
import torchvision.models as models
import torchvision.transforms as transforms
import numpy as np
import torchvision.transforms.functional as TF
from PIL import Image

In [37]:
alexnet = models.alexnet(pretrained=True)
alexnet = torch.nn.Sequential(*list(alexnet.children())[:-1])
alexnet.eval()

transform = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])



In [38]:
def get_features_from_images(images):
    features = []
    for image in images:
        image = image.unsqueeze(0)
        
        with torch.no_grad():
            feature = alexnet(image)
            feature = np.squeeze(feature.numpy())
            features.append(feature)
    return features

In [39]:
def reshape_features(features):
    total_samples = features.shape[0]
    new_shape = (total_samples, -1) 
    reshaped_features = np.reshape(features, new_shape)
    return reshaped_features

In [40]:
train_features = np.array(get_features_from_images(get_imgs(train_img_paths)))
test_features = np.array(get_features_from_images(get_imgs(test_img_paths)))

In [41]:
train_features = reshape_features(train_features)
test_features = reshape_features(test_features)

In [42]:
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

# SVM

In [43]:
svm_model = svm.SVC(C=svm_c, kernel=svm_kernal)
svm_model.fit(train_features,train_labels)

#### Training Accuracy

In [44]:
svm_model.score(train_features,train_labels)

1.0

#### Testing Accuracy

In [45]:
svm_model.score(test_features,test_labels)

0.9142335766423357

# Logistic Regression

In [46]:
lr_model = LogisticRegression()
lr_model.fit(train_features,train_labels)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


#### Training Accuracy

In [47]:
lr_model.score(train_features,train_labels)

1.0

#### Testing Accuracy

In [48]:
lr_model.score(test_features,test_labels)

0.926094890510949

# BIKE AND HORSE DATASET

In [49]:
rdir_path = '/content/drive/MyDrive/VRAssignment2/Bike_Horses'

In [50]:
dataset = getDataset(rdir_path)
train_img_paths, test_img_paths, train_labels, test_labels = train_test_split(
                                                dataset[0], 
                                                dataset[1], 
                                                train_size=splitProp, 
                                                random_state = 42, 
                                                shuffle = True, 
                                                stratify = dataset[1])

In [51]:
train_features = np.array(get_features_from_images(get_imgs(train_img_paths)))
test_features = np.array(get_features_from_images(get_imgs(test_img_paths)))

In [52]:
train_features = reshape_features(train_features)
test_features = reshape_features(test_features)

In [53]:
train_labels = np.array(train_labels)
test_labels = np.array(test_labels)

## SVM

In [54]:
svm_model.fit(train_features, train_labels)

In [55]:
svm_model.score(train_features, train_labels)

1.0

In [56]:
svm_model.score(test_features, test_labels)

1.0

## Logistic Regression

In [57]:
lr_model.fit(train_features, train_labels)

In [58]:
lr_model.score(train_features, train_labels)

1.0

In [59]:
lr_model.score(test_features, test_labels)

1.0