In [1]:
import os
import cv2
import copy
import csv
import random
import pickle
import numpy as np
import pandas as pd
import itertools
from scipy.stats import randint
from itertools import cycle
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

In [3]:
from sklearn import preprocessing
from sklearn.manifold import TSNE
from sklearn.decomposition import TruncatedSVD
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from scipy.sparse import csr_matrix
from scipy import stats
#from my_ml_lib import DataManipulationTools, MetricTools, PlotTools
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

In [4]:
# Import different classifiers
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import PCA
from skimage.feature import hog, local_binary_pattern, greycomatrix, greycoprops

In [28]:
path= r"C:\Users\tejal\Desktop\Tejal Salunke\college\CV\CV project\dataset\train"
def read_images(path):
    images = []
    labels = []
    num1 = 32
    num2 = 32
    for file_name in os.listdir(path):
        file_path = path + '/' + file_name
        for img_name in os.listdir(file_path):
            if not img_name.startswith('.'):
                if img_name.endswith('.png'):
                    img = cv2.imread(file_path + '/' + img_name)
                    new_img = cv2.resize(img, (num2, num1))
                    images.append(new_img)
                    if file_name == 'Parasitized':
                        label = 0
                    else:
                        label = 1
                    labels.append(label)
    
    return np.array(images), np.array(labels)

In [29]:
def save_feature(feature, name):
    # saving all our feature vectors in pickled file
    with open('cache/' + name + '.pkl', 'wb') as fp:
        pickle.dump(csr_matrix(feature), fp)
    
    print(f'Feature saved with name cache/{name}.pkl')

def load_feature(feature_name):
    return pickle.load(open(feature_name, 'rb')).A

In [30]:
def save_model(model):
    filename = input('Enter model file name:')
    pickle.dump(model, open('models/'+filename + '.pkl', 'wb'))
    print(f'Successfully saved model in models/{filename}.pkl')

def load_model(model_name):
    return pickle.load(open(model_name, 'rb'))

In [31]:
def get_gabor(images, name="gabor", save=False):
    
    # Gabor filter banks with different orientations and at different scales
    filters = []
    ksize = 9
    sigma = 0.1
    gamma = 0.5
    phi = 0
    
    # define the range for theta and nu
    for theta in np.arange(0, np.pi, np.pi / 8):
        for nu in np.arange(0, 6*np.pi/4, np.pi / 4):
            kern = cv2.getGaborKernel((ksize, ksize), sigma, theta, nu, gamma, phi, ktype=cv2.CV_32F)
            kern /= 1.5*kern.sum()
            filters.append(kern)
    
    # function to convolve the image with the filters
    def process(img, filters):
        accum = np.zeros_like(img)
        for kern in filters:
            fimg = cv2.filter2D(img, cv2.CV_8UC3, kern)
            np.maximum(accum, fimg, accum)
            return accum
        
    
    def get_image_gabor(image):
        # Local Binary Pattern
        f = []

        # calculating the local energy for each convolved image
        for j in range(40):
            res = process(cv2.cvtColor(image, cv2.COLOR_RGB2GRAY), filters[j])
            res = np.array(res)
            f.append(np.sum(np.multiply(res, res)))

        # calculating the mean amplitude for each convolved image
        for j in range(40):
            res = process(cv2.cvtColor(image, cv2.COLOR_RGB2GRAY), filters[j])
            res = np.array(res)
            f.append(np.sum(abs(res)))
        return f
    
    # Gabor descriptor for all images
    features = []
    for i, img in enumerate(images):
        f = get_image_gabor(img)
        features.append(f)
    
    result = np.array(features)
    
    if save:
        save_feature(result, name)
    
    return result

In [32]:
def train_model(train_x, train_y, model_name='NB', validation=None):
    """
    Possible model names: ['NB', 'SVM', 'XGB', 'MLP', 'ADA', 'BAG', 'RF']
    default = 'NB'
    
    validation: (val_x, val_y) tupple for validation accuracy score.
    
    return: trained model
    """
    model = None
    if model_name == 'SVM':
        model = svm.SVC(gamma='scale', probability=True)
    elif model_name == 'XGB':
        model = XGBClassifier(n_estimators=200, max_depth=5, n_jobs=2)
#         model = XGBClassifier()
    elif model_name == 'MLP':
        model = MLPClassifier(hidden_layer_sizes=(100,100,100), max_iter=800, alpha=0.0001,
                     solver='sgd', verbose=10, tol=0.000000001)
    elif model_name == 'ADA':
        model = AdaBoostClassifier(n_estimators=50)
    elif model_name == 'BAG':
        model = BaggingClassifier(n_jobs=2, n_estimators=50)
    elif model_name == 'RF':
        model = RandomForestClassifier(n_estimators=200, max_depth=10)
    elif model_name == 'KNN':
        model = KNeighborsClassifier(n_neighbors=5, weights='distance', algorithm='auto', leaf_size=30, p=2, metric='minkowski', metric_params=None, n_jobs=None)
    else:
        model = GaussianNB()
    
    model.fit(train_x, train_y)
    
    if validation is not None:
        y_hat = model.predict(validation[0])
        acc = metrics.accuracy_score(validation[1], y_hat)
        print(f"Validation Accuracy in '{model_name}' = {acc}")
        cm = metrics.confusion_matrix(validation[1], y_hat)
        print(cm)
        recall = cm[0][0] / (cm[0][0] + cm[0][1])
        precision = cm[0][0] / (cm[0][0] + cm[1][0])
        f1 = 2*(precision*recall)/(precision+recall)
        print(f"Recall in '{model_name}' = {recall}")
        print(f"Precision in '{model_name}' = {precision}")
        print(f"F1 Score in '{model_name}' = {f1}")
               
    return model

In [47]:
input1= r"C:\Users\tejal\Desktop\Tejal Salunke\college\CV\CV project\dataset\train"
full_data_x, full_data_y = read_images(input1)

In [48]:
print("dataset size : ", full_data_x.shape, full_data_y.shape)

dataset size :  (0,) (0,)


In [49]:
print("One Image size: ", full_data_x[2].shape)

IndexError: index 2 is out of bounds for axis 0 with size 0

In [57]:
data_x, test_x, data_y, test_y = train_test_split(full_data_x, full_data_y, test_size=0.75)

ValueError: With n_samples=0, test_size=0.75 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [63]:
input=r"C:\Users\tejal\Desktop\Tejal Salunke\college\CV\CV project\dataset\train"
input.shape()


AttributeError: 'str' object has no attribute 'shape'