# Actividad 5

Especificaciones
1. Implementa cinco algoritmos de clasificación supervisados.
2. Utilizando el dataset de cáncer evalúa mAp utilizando diferentes parámetros para cada modelo, e.g. numero de componentes de proyección, sin proyección, regularización, restricción, distancias.
3. Utiliza diferentes porcentajes de entrenamiento, e.g., 1/2, 3/4 y 8/10.
4. Genera una tabla comparativa de todas la pruebas.

In [5]:
import numpy
import pandas
import matplotlib.pyplot as plt
import scipy
from warnings import simplefilter
from sklearn.exceptions import ConvergenceWarning
simplefilter("ignore", category=ConvergenceWarning)

In [15]:
# Load dataset

from sklearn.datasets import load_breast_cancer
import matplotlib.pyplot as plt
data = load_breast_cancer()
X = data.data
Y = data.target

# Load models

from sklearn.metrics.cluster import adjusted_rand_score
from sklearn.feature_selection import SelectKBest, chi2, SelectPercentile
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import LinearSVC
from sklearn.linear_model import SGDClassifier
from sklearn.neighbors import RadiusNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.naive_bayes import BernoulliNB

Models = { 
    'LinearSVC' : LinearSVC() ,
    'SGDClassifier' : SGDClassifier(),
    'RadiusNeighbors': RadiusNeighborsClassifier(radius = 10000),
    'GaussianProcess': GaussianProcessClassifier(),
    'BernoulliNB': BernoulliNB(),
}

modelNames = []
accuracyList = []
trainingSizeList = []
trainingSeedList = []
numberKClustersList = []
trainingPercentile = []
trainingComponents = []
numberKClusters = 3
trainingSizes = [0.5, 0.75, 0.8]
percetileList = [5,10,20,30,40,50,60]
numberOfComponentList = range(1,7)
trainingSeeds = range(0,6)
argumentVariation = range(1,4)

for name, model in Models.items():
    for size in trainingSizes:
        for seed in trainingSeeds:
            for i in argumentVariation:
                if i == 1:
                    for k in range(1,numberKClusters):
                        X_hat = SelectKBest (chi2 , k = k).fit_transform(X,Y)
                        X_train, X_test, Y_train, Y_test = train_test_split(X_hat, Y, test_size = size, random_state=seed)
                        model.fit(X_train,Y_train)
                        Y_hat = model.predict(X_test)
                        acc = numpy.mean(Y_hat == Y_test)
                        modelNames.append(name)
                        accuracyList.append(acc)
                        trainingSizeList.append(size)
                        trainingSeedList.append(seed)
                        numberKClustersList.append(k)
                        trainingPercentile.append(0)
                        trainingComponents.append(0)
                elif i == 2:
                    for percentile in percetileList:
                        X_hat = SelectPercentile(chi2, percentile = percentile).fit_transform(X, Y)
                        X_train, X_test, Y_train, Y_test = train_test_split(X_hat, Y, test_size = size, random_state=seed)
                        model.fit(X_train,Y_train)
                        Y_hat = model.predict(X_test)
                        acc = numpy.mean(Y_hat == Y_test)
                        modelNames.append(name)
                        accuracyList.append(acc)
                        trainingSizeList.append(size)
                        trainingSeedList.append(seed)
                        numberKClustersList.append(0)
                        trainingPercentile.append(percentile)
                        trainingComponents.append(0)
                elif i == 3:
                    for component in numberOfComponentList:
                        pca = PCA( n_components = component )
                        pca.fit(X)
                        X_hat = pca.transform(X)
                        X_train, X_test, Y_train, Y_test = train_test_split(X_hat, Y, test_size = size, random_state=seed)
                        model.fit(X_train,Y_train)
                        Y_hat = model.predict(X_test)
                        acc = numpy.mean(Y_hat == Y_test)
                        modelNames.append(name)
                        accuracyList.append(acc)
                        trainingSizeList.append(size)
                        trainingSeedList.append(seed)
                        numberKClustersList.append(0)
                        trainingPercentile.append(0)
                        trainingComponents.append(component)

DF=pandas.DataFrame()
DF=DF.assign(Model = modelNames)
DF=DF.assign(TestSize = trainingSizeList)
DF=DF.assign(TestSeed = trainingSeedList)
DF=DF.assign(ACC = accuracyList)
DF=DF.assign(K = numberKClustersList)
DF=DF.assign(Percentile = trainingPercentile)
DF=DF.assign(PCA = trainingComponents)
DF = DF.nlargest(50, ['ACC'])
print(DF)

              Model  TestSize  TestSeed       ACC  K  Percentile  PCA
88        LinearSVC      0.50         5  0.964912  0           0    5
87        LinearSVC      0.50         5  0.961404  0           0    4
356   SGDClassifier      0.50         5  0.961404  0           0    3
357   SGDClassifier      0.50         5  0.961404  0           0    4
80        LinearSVC      0.50         5  0.957895  0          30    0
358   SGDClassifier      0.50         5  0.957895  0           0    5
359   SGDClassifier      0.50         5  0.957895  0           0    6
178       LinearSVC      0.75         5  0.957845  0           0    5
83        LinearSVC      0.50         5  0.954386  0          60    0
267       LinearSVC      0.80         5  0.951754  0           0    4
268       LinearSVC      0.80         5  0.951754  0           0    5
269       LinearSVC      0.80         5  0.951754  0           0    6
282   SGDClassifier      0.50         0  0.950877  0           0    4
283   SGDClassifier 