In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

In [2]:
import proceso_imagenes
import matplotlib.pyplot as plt
import numpy as np

imagenes, clases = proceso_imagenes.cargar_numeros("./nums")
print("Loaded:",imagenes.shape[0], "instances")

Loaded: 1190 instances


In [3]:
from multiprocessing import Pool
import sys
import warnings

warnings.filterwarnings('ignore')

p = Pool(2)

proyeccion_x = np.asarray(list(p.map(proceso_imagenes.proyeccion_x,imagenes)))
proyeccion_y = np.asarray(list(p.map(proceso_imagenes.proyeccion_y,imagenes)))
print("Proyecciones calculadas")
sys.stdout.flush()

proyeccion_x_norm = np.asarray(list(p.map(proceso_imagenes.preparar,proyeccion_x)))
proyeccion_y_norm = np.asarray(list(p.map(proceso_imagenes.preparar,proyeccion_y)))
print("Proyecciones procesadas")
sys.stdout.flush()

numero_rectas = np.fromiter(p.map(proceso_imagenes.contar_rectas,imagenes),dtype=int)
print("Numero de rectas calculadas")
sys.stdout.flush()

intensidades_medias_umbralizadas = np.fromiter(p.map(proceso_imagenes.intensidad_media_umbralizada,imagenes), dtype=float)
print("Intensidades medias calculadas")
sys.stdout.flush()

altos = np.fromiter(p.map(proceso_imagenes.alto_numero, imagenes),dtype=int)
anchos = np.fromiter(p.map(proceso_imagenes.ancho_numero, imagenes),dtype=int)
print("Altos/anchos calculados")
sys.stdout.flush()

p.close()

Proyecciones calculadas
Proyecciones procesadas
Numero de rectas calculadas
Intensidades medias calculadas
Altos/anchos calculados


In [4]:
dataset_hist = np.hstack((proyeccion_x_norm,proyeccion_y_norm))
dataset_miscelanious = np.vstack((numero_rectas,intensidades_medias_umbralizadas,altos,anchos)).T
dataset_full = np.hstack((dataset_hist,dataset_miscelanious))
print("Dataset construidos")

Dataset construidos


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

l_n_neighbors = 2 * np.arange(7) + 1

datasets = [dataset_full,dataset_hist]
datasets_names = ["dataset_full",
                  "dataset_knn"]

max_features = 10

def hellinger(x,y):
    return np.sqrt(np.sum((np.sqrt(x) - np.sqrt(y))**2))

def chisq(x,y):
    return np.sum((x - y)**2 / (x+y))

# Random Forest
for dataset, name in zip(datasets,datasets_names):
    print("\n\nDataset:",name,"\n")
    
    split = train_test_split(dataset,clases,test_size=0.25)

    train_attr = split[0]
    test_attr = split[1]

    train_labels = np.asarray(split[2])
    test_labels = np.asarray(split[3])

    features = np.min([max_features, dataset.shape[1]])
    
    model = RandomForestClassifier()
    model.fit(train_attr,train_labels)
    scores = cross_val_score(model, dataset, clases, cv=5)
    print("\t",model.__class__.__name__,"Accuracy: %0.2f (+/- %0.2f)" % (np.mean(scores), np.std(scores)))
    print("\n")

    # Knn hellinger
    #for n_neighbors in l_n_neighbors:
    #    model = KNeighborsClassifier(n_neighbors=n_neighbors,metric=hellinger)
    #    model.fit(train_attr,train_labels)
    #    scores = cross_val_score(model, dataset, clases, cv=5)
    #    print(model.__class__.__name__,"hellinger",(n_neighbors),"Accuracy: %0.2f (+/- %0.2f)" % (np.mean(scores), np.std(scores)))

    # Knn chisq
    for n_neighbors in l_n_neighbors:
        model = KNeighborsClassifier(n_neighbors=n_neighbors,metric=chisq)
        model.fit(train_attr,train_labels)
        scores = cross_val_score(model, dataset, clases, cv=5)
        print("\t",model.__class__.__name__,"chisq",(n_neighbors),"Accuracy: %0.2f (+/- %0.2f)" % (np.mean(scores), np.std(scores)))
    print("\n")
    
    # Knn minkowski
    for n_neighbors in l_n_neighbors:
        model = KNeighborsClassifier(n_neighbors=n_neighbors)
        model.fit(train_attr,train_labels)
        scores = cross_val_score(model, dataset, clases, cv=5)
        print("\t",model.__class__.__name__,"minkowski",(n_neighbors),"Accuracy: %0.2f (+/- %0.2f)" % (np.mean(scores), np.std(scores)))
    print("\n")
    
    # Gaussian NB  
    model = GaussianNB()
    model.fit(train_attr,train_labels)
    scores = cross_val_score(model, dataset, clases, cv=5)
    print("\t",model.__class__.__name__,"Accuracy: %0.2f (+/- %0.2f)" % (np.mean(scores), np.std(scores)))
    print("\n")
    
    # Decision Tree
    model = DecisionTreeClassifier()
    model.fit(train_attr,train_labels)
    scores = cross_val_score(model, dataset, clases, cv=5)
    print("\t",model.__class__.__name__,"Accuracy: %0.2f (+/- %0.2f)" % (np.mean(scores), np.std(scores)))
    




Dataset: dataset_full 

	 RandomForestClassifier Accuracy: 0.85 (+/- 0.01)


	 KNeighborsClassifier chisq 1 Accuracy: 0.86 (+/- 0.01)
	 KNeighborsClassifier chisq 3 Accuracy: 0.85 (+/- 0.02)
	 KNeighborsClassifier chisq 5 Accuracy: 0.85 (+/- 0.02)
	 KNeighborsClassifier chisq 7 Accuracy: 0.85 (+/- 0.02)
	 KNeighborsClassifier chisq 9 Accuracy: 0.85 (+/- 0.02)
	 KNeighborsClassifier chisq 11 Accuracy: 0.85 (+/- 0.02)
	 KNeighborsClassifier chisq 13 Accuracy: 0.85 (+/- 0.03)


	 KNeighborsClassifier minkowski 1 Accuracy: 0.83 (+/- 0.01)
	 KNeighborsClassifier minkowski 3 Accuracy: 0.82 (+/- 0.01)
	 KNeighborsClassifier minkowski 5 Accuracy: 0.82 (+/- 0.02)
	 KNeighborsClassifier minkowski 7 Accuracy: 0.83 (+/- 0.03)
	 KNeighborsClassifier minkowski 9 Accuracy: 0.82 (+/- 0.03)
	 KNeighborsClassifier minkowski 11 Accuracy: 0.82 (+/- 0.03)
	 KNeighborsClassifier minkowski 13 Accuracy: 0.81 (+/- 0.02)


	 GaussianNB Accuracy: 0.82 (+/- 0.01)


	 DecisionTreeClassifier Accuracy: 0.77 (+/- 0