In [5]:
import numpy as np 
import os 
import cv2 
import matplotlib.pyplot as plt 
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
from matplotlib.ticker import LinearLocator, FormatStrFormatter
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.svm import SVC
import sklearn.metrics as skmetrics
import pandas as pd

from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC, SVC
from sklearn.metrics import confusion_matrix
from scipy.io import loadmat

import itertools
import numpy as np 
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import fetch_openml

#mnist = fetch_openml('mnist_784')

In [6]:
species = os.listdir('scattered_image_lichen_4_1_2_')

In [7]:
def plot_confusion_matrix(cm, classes,string,directory,dt,normalize=True):
    """
    Function which plots confusion matrix

    Input cm = confusion matrix
    Input classes = array with class labels
    Input string = string to give name of the saved image
    Input directory = string to give directory to save the image
    Input normalize (False) = If true function will give accuracy instead of pure number
    Input Title (Confusion matrix) = title of the image


    Output : None
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
    accuracy_score = skmetrics.accuracy_score(y_pred, y_test)
    plt.imshow(cm, cmap=plt.cm.Blues)
    plt.title(' accuracy: ' + str(accuracy_score) + '   best rbf_kernel model: '+ str(dt))
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=90)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        if(i==j):
            plt.text(j, i, format(cm[i, j], fmt),
                     horizontalalignment="center",
                     color="white" if cm[i, j] > thresh else "black")
        else:
            continue

    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.savefig(directory +'confusion_matrix'+string+'.jpg')
    
    
def apply_pca_to_train_and_test_images(train_vct, test_vct, perc = .95):
    """
    INPUT:
    train_vct = training vector of shape (N_samples, dimensionality)
    test_vct = testing vector of shape (N_samples, dimensionality)
    perc = percentage of variance that we want ot preserve 
    
    OUTPUT 
    train_transform = new training vector 
    test_transform = new test vector 
    """
    
    scaler = StandardScaler()
    scaler.fit(train_vct)
    train_vct = scaler.transform(train_vct)
    test_vct = scaler.transform(test_vct)
    pca = PCA(perc)
    pca.fit(train_vct)
    train_transform = pca.transform(train_vct)
    test_transform = pca.transform(test_vct)
    
    return train_transform, test_transform
    

In [8]:
def load_descriptor_from_matfile(ft_path,label_list = species):
    feat = loadmat(ft_path)
    feat = feat['scattered_image']
    lab = ft_path.split('/')[1]
    #print(type(lab))
    #print(lab.replace("_", " "))
    lab = label_list.index(lab)
    return feat, lab


def extract_features(labels_list, or_pt):
    data_all = []
    data_mean = []
    label = []
    for i, tp in enumerate(labels_list):
        pth = os.path.join(or_pt, tp)
        mat_files = os.listdir(pth)
        for j,mt in enumerate(mat_files):
            path = os.path.join(pth,mt)
            x,y = load_descriptor_from_matfile(path)
            xall = x.reshape(-1)
            xmean = np.sum(np.sum(x,axis = 2),axis = 1)
            xmean = xmean.reshape(-1)
            data_all.append(xall)
            data_mean.append(xmean)
            label.append(y)

    data_all = np.array(data_all)
    data_mean = np.array(data_mean)
    label = np.array(label) 
    return data_all, data_mean, label


In [None]:
c = ['1','2','3','4','5','6','7','8']


for cc in c:
    print('--------')
    print(cc)
    f = 'scattered_image_lichen_4_' + cc + '_2_'
    labels_list = os.listdir(f)
    title_lich = 'lichen_JLM_4'+ cc + '2_'
    data_all, data_mean, label = extract_features(labels_list,f)
    # PCA REDUCTION 
    X_train, X_test, y_train, y_test = train_test_split(data_mean, label, test_size=0.70, random_state=10, shuffle=True)
    svc  = OneVsRestClassifier(SVC(kernel = 'linear',gamma = 'scale'),n_jobs = -1)
    svc = svc.fit(X_train, y_train)
    linear_score = svc.score(X_test,y_test)
    print(linear_score)
    
    # First grid search to find best parameters
    param_grid = {'C': [ 1e2, 1e3, 5e3, 1e4, 5e4], 'gamma': [ 1e-9, 1e-8, 1e-7, 1e-6, 1e-5, 0.0001, 0.0005, 0.001, 0.005]}
    clf0 = GridSearchCV(SVC(kernel='rbf'), param_grid)
    clf = clf0.fit(X_train, y_train)
    print("Best estimator found by grid search : ", clf.best_estimator_)
    y_pred = clf.predict(X_test)
    print('Accuracy score :', skmetrics.accuracy_score(y_pred, y_test))
    
    plt.figure(figsize = (15,10)) 

    plot_confusion_matrix(skmetrics.confusion_matrix(y_pred, y_test),labels_list,title_lich,'results/',clf.best_estimator_)
    
    

In [None]:
c = ['1','2','3','4','5','6','7','8']


for cc in c:
    print('--------')
    print(cc)
    f = 'scattered_image_lichen_4_' + cc + '_2_'
    labels_list = os.listdir(f)
    title_lich = 'lichen_JLM_4'+ cc + '2_'
    data_all, data_mean, label = extract_features(labels_list,f)
    # PCA REDUCTION 
    X_train, X_test, y_train, y_test = train_test_split(data_mean, label, test_size=0.70, random_state=10, shuffle=True)
    svc  = OneVsRestClassifier(SVC(kernel = 'linear',gamma = 'scale'),n_jobs = -1)
    svc = svc.fit(X_train, y_train)
    linear_score = svc.score(X_test,y_test)
    print(linear_score)
    
    # First grid search to find best parameters
    param_grid = {'C': [  1e3, 5e3, 1e4], 'gamma': [ 1e-5, 0.0001, 0.0005, 0.001],'degree':[2,3,4,5,6]}
    clf0 = GridSearchCV(SVC(kernel='poly'), param_grid)
    clf = clf0.fit(X_train, y_train)
    print("Best estimator found by grid search : ", clf.best_estimator_)
    y_pred = clf.predict(X_test)
    print('Accuracy score :', skmetrics.accuracy_score(y_pred, y_test))
    
    plt.figure(figsize = (15,10)) 

    plot_confusion_matrix(skmetrics.confusion_matrix(y_pred, y_test),labels_list,title_lich,'results/',clf.best_estimator_)
    
    

In [11]:
!pip install kymatio

Collecting kymatio
  Using cached kymatio-0.2.0-py3-none-any.whl (92 kB)
Collecting appdirs
  Using cached appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)
Installing collected packages: appdirs, kymatio
Successfully installed appdirs-1.4.4 kymatio-0.2.0


In [25]:
from kymatio.numpy import Scattering2D

scattering = Scattering2D(J=2, shape=(100, 100))
J = 2
M, N = 100, 100

# Generate a sample signal.
x = np.random.randn(3,M, N)

# Define a Scattering2D object.
S = Scattering2D(J, (M, N))

# Calculate the scattering transform.
Sx = S.scattering(x)

# Equivalently, use the alias.
Sx = S(x)
Sxx = Sx.reshape(81*3,25,25)

In [26]:
Sxx.shape

(243, 25, 25)

## END-TO-END PROCESS HERE IN PYTHON 

In [127]:
import cv2
import numpy as np
import os
import matplotlib.pyplot as plt
import random
import pylab as pl
from sklearn.metrics import confusion_matrix,accuracy_score

DEFINE TRAINING PATH

In [174]:
train_path="../../../patches/train"
class_names=os.listdir(train_path)

In [175]:
class_dir = {}
for i,c in enumerate(class_names):
    class_dir[c] = i

In [176]:
image_paths=[]
image_classes=[]

In [177]:
def img_list(path):
    return (os.path.join(path,f) for f in os.listdir(path))

In [178]:
for training_name in class_names:
    dir_=os.path.join(train_path,training_name)
    class_path=img_list(dir_)
    image_paths+=class_path
    for i in range(len(os.listdir(dir_))):
        image_classes.append(class_dir[training_name])

In [179]:
len(image_classes)

1573

APPEND ALL IMAGE PATH WITH ITS CORRESPONDING LABEL IN A LIST

In [180]:
D=[]
for i in range(len(image_paths)):
    D.append((image_paths[i],image_classes[i]))

In [181]:
dataset = D
random.shuffle(dataset)


image_paths, y_train = zip(*train)
image_paths_test, y_test = zip(*test)

## FEATURE EXTRACTION 




In [182]:
des_list=[]

In [183]:
import cv2
def extract_features(dataset, jj = 2,l = 4 , image_shape = (3,100,100),sp =1 ):
    s = Scattering2D(J=jj, L = l, shape=(100, 100))
    N = len(dataset)
    sh1 = 1 + l*jj + (l*l*jj*(jj-1))//2
    sh2 = 100//(2**jj)
    sh3 = 100//(2**jj)
    out = []
    lab = []
    des_list = []
    for i,c in enumerate(dataset):
        lab.append(c[1])
        img = cv2.imread(c[0])
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img,(100,100))
        img = cv2.normalize(img, None)
        im = np.rollaxis(img, 2, 1)
        img = np.rollaxis(im,1,0)  
        cc = S.scattering(img)
        cc = cc.reshape(cc.shape[0]*cc.shape[1],cc.shape[2],cc.shape[3])
        xmean = np.sum(np.sum(cc,axis = 2),axis = 1)
        xmean = xmean.reshape(-1)        
        out.append(xmean)
        des_list.append((c,xmean))
    out = np.array(out)
    lab = np.array(lab)
    return out,lab,des_list
    

In [184]:
train_data,train_label,des_list = extract_features(dataset)

In [185]:
descriptors_float=train_data.astype(float)

### Performing K Means clustering on Descriptors

In [None]:
from scipy.cluster.vq import kmeans,vq
from sklearn.preprocessing import StandardScaler

def BOVW(descriptor, k = 400):
    voc,variance=kmeans(descriptors_float,k,1)
    im_features=np.zeros((len(image_paths),k),"float32")
    c =des_list[0][1].shape[0]
    im_features=np.zeros((len(image_paths),k),"float32")
    for i in range(len(image_paths)):
        words,distance=vq(des_list[i][1].reshape(1, c),voc)
        for w in words:
            im_features[i][w]+=1
    stdslr=StandardScaler().fit(im_features)
    im_features=stdslr.transform(im_features)
    return im_features

In [160]:
from scipy.cluster.vq import kmeans,vq

k=400
voc,variance=kmeans(descriptors_float,k,1)

In [188]:
des_list[0][1].shape[0]

243

In [162]:
im_features=np.zeros((len(image_paths),k),"float32")
for i in range(len(image_paths)):
    words,distance=vq(des_list[i][1].reshape(1,243),voc)
    for w in words:
        im_features[i][w]+=1

In [163]:
from sklearn.preprocessing import StandardScaler
stdslr=StandardScaler().fit(im_features)
im_features=stdslr.transform(im_features)

In [164]:
im_features.shape

(1200, 400)

In [165]:
#from sklearn.svm import LinearSVC
#clf=LinearSVC(max_iter=80000)
#clf.fit(im_features,np.array(y_train))

LinearSVC(max_iter=80000)