In [14]:
import cv2
import numpy as np
import glob
import os
import matplotlib
import matplotlib.pyplot as plt
from PIL import Image
from time import time
from sklearn import svm
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
from sklearn.metrics import classification_report, f1_score
from keras.utils.np_utils import to_categorical
from libitmal import dataloaders as itmaldataloaders

In [25]:
currmode="N/A" # GLOBAL var!

def SearchReport(model): 
    
    def GetBestModelCTOR(model, best_params):
        def GetParams(best_params):
            r=""          
            for key in sorted(best_params):
                value = best_params[key]
                t = "'" if str(type(value))=="<class 'str'>" else ""
                if len(r)>0:
                    r += ','
                r += f'{key}={t}{value}{t}'  
            return r            
        try:
            p = GetParams(best_params)
            return type(model).__name__ + '(' + p + ')' 
        except:
            return "N/A(1)"
        
    print("\nBest model set found on train set:")
    print()
    print(f"\tbest parameters={model.best_params_}")
    print(f"\tbest '{model.scoring}' score={model.best_score_}")
    print(f"\tbest index={model.best_index_}")
    print()
    print(f"Best estimator CTOR:")
    print(f"\t{model.best_estimator_}")
    print()
    try:
        print(f"Grid scores ('{model.scoring}') on development set:")
        means = model.cv_results_['mean_test_score']
        stds  = model.cv_results_['std_test_score']
        i=0
        for mean, std, params in zip(means, stds, model.cv_results_['params']):
            print("\t[%2d]: %0.3f (+/-%0.03f) for %r" % (i, mean, std * 2, params))
            i += 1
    except:
        print("WARNING: the random search do not provide means/stds")
    
    global currmode                
    assert "f1_micro"==str(model.scoring), f"come on, we need to fix the scoring to be able to compare model-fits! Your scoreing={str(model.scoring)}...remember to add scoring='f1_micro' to the search"   
    return f"best: dat={currmode}, score={model.best_score_:0.5f}, model={GetBestModelCTOR(model.estimator,model.best_params_)}", model.best_estimator_ 

def ClassificationReport(model, X_test, y_test, target_names=None):
    assert X_test.shape[0]==y_test.shape[0]
    print("\nDetailed classification report:")
    print("\tThe model is trained on the full development set.")
    print("\tThe scores are computed on the full evaluation set.")
    print()
    y_true, y_pred = y_test, model.predict(X_test)                 
    print(classification_report(y_true, y_pred, target_names))
    print()
    

def FullReport(model, X_test, y_test, t):
    print(f"SEARCH TIME: {t:0.2f} sec")
    beststr, bestmodel = SearchReport(model)
    ClassificationReport(model, X_test, y_test)    
    print(f"CTOR for best model: {bestmodel}\n")
    print(f"{beststr}\n")
    return beststr, bestmodel


In [2]:
#TODO load the dataset:
import sys,os
from time import time
sys.path.append(os.path.expanduser('ProjectFunctions'))
from ProjectFunctions import LoadShapes as LS


start = time()

X, y = LS.getShapes()

t = time()-start
print(f"OK, Load time={t:0.1f}")

OK, Load time=42.4


## Prep and split

In [3]:
X = np.array(X)
X_shape = X.shape
print("The shape of X: ", X_shape)

The shape of X:  (14970, 200, 200)


In [4]:
#split
X_r = X.reshape(14970, 200*200)

X_rShape = X_r.shape
print("The new shape of the reshaped X: ", X_rShape)

input_X_rShape = X_rShape[1:]
print("The shape of X ready to be inputed in the CNN: ", input_X_rShape)

The new shape of the reshaped X:  (14970, 40000)
The shape of X ready to be inputed in the CNN:  (40000,)


In [5]:
X_neuralReady=np.array(X_r)/255

print("Done preparing data for neural networks.")

Done preparing data for neural networks.


In [6]:
from sklearn.model_selection import train_test_split

test_size=0.3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42, shuffle=True)

print("Done splitting train and test data.")

Done splitting train and test data.


In [7]:

Xnr_train, Xnr_test, ynr_train, ynr_test = train_test_split(X_neuralReady, y, test_size=test_size, random_state=42, shuffle=True)

print("Done splitting train and test data ready for neural networks.")

Done splitting train and test data ready for neural networks.


In [8]:
# Setup search parameters
model = svm.SVC(gamma=0.001) # NOTE: gamma="scale" does not work in older Scikit-learn frameworks, 
                             # FIX:  replace with model = svm.SVC(gamma=0.001)



In [9]:
print(X_neuralReady[1])
print(y[1])


# y_train_binary = to_categorical(y_train)
# y_test_binary  = to_categorical(y_test)
# t = time()-start

# # Controls the dimention of the train/test arrays.
# assert y.ndim==1                     
# assert y_train_binary.ndim==2       
# assert y_test_binary.ndim ==2   

[1. 1. 1. ... 1. 1. 1.]
Star


In [12]:
Xnr_train_split, Xnr_test_split, ynr_train_split, ynr_test_split = Xnr_train[:1000], Xnr_test[:1000], ynr_train[:1000], ynr_test[:1000]

In [23]:
tuning_parameters = {
    'kernel':('linear', 'rbf'), 
    'C':[1, 10]
}


CV=5
VERBOSE=0
# Run GridSearchCV for the model
start = time()
grid_tuned = GridSearchCV(model, tuning_parameters, cv=CV, scoring='f1_micro', verbose=VERBOSE, n_jobs=-1, iid=True)
grid_tuned.fit(Xnr_train_split, ynr_train_split)
t = time()-start



In [26]:
# Report result
b0, m0= FullReport(grid_tuned , X_test, y_test, t)
print('OK')

SEARCH TIME: 242.89 sec

Best model set found on train set:

	best parameters={'C': 1, 'kernel': 'rbf'}
	best 'f1_micro' score=1.0
	best index=1

Best estimator CTOR:
	SVC(C=1, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

Grid scores ('f1_micro') on development set:
	[ 0]: 0.998 (+/-0.005) for {'C': 1, 'kernel': 'linear'}
	[ 1]: 1.000 (+/-0.000) for {'C': 1, 'kernel': 'rbf'}
	[ 2]: 0.998 (+/-0.005) for {'C': 10, 'kernel': 'linear'}
	[ 3]: 1.000 (+/-0.000) for {'C': 10, 'kernel': 'rbf'}


AttributeError: 'list' object has no attribute 'shape'