In [1]:
import numpy as np
from sklearn import svm
from sklearn.cross_validation import cross_val_score, StratifiedKFold

#loading and selecting data

all_data = np.loadtxt ('antifungales.csv', delimiter = ',', dtype = 'S') #Training data set
header = all_data [0, :]
datos = all_data [1:, 1:]

todos_peña = np.loadtxt('colorantespena.csv', delimiter = ',', dtype = 'S')
peña = todos_peña[1:, 1:]

#cleaning data 


def clean_data(dataset):
    '''returns a float array of the values obtainded by padel descriptor'''
    array = dataset.shape
    clean_dataset = np.empty(array)
    
    for columna in range(array[1]):
        for element in range(array[0]):
            if dataset[element, columna] == b'' or dataset[element, columna] == b'Infinity':
                clean_dataset[element, columna] = np.NAN
            else:
                clean_dataset[element, columna] = float(dataset[element, columna])

    return clean_dataset

X_train = clean_data(datos)
X_test = clean_data(peña)

#array of clases

clases = np.ones((70,), dtype = np.int)
test_clases = np.loadtxt('clases_colorantes.csv', delimiter = ',', dtype = 'S')
test_clases = test_clases.astype(int)
all_clases = np.append(clases, test_clases)

In [2]:
datos = np.append(X_train, X_test, axis = 0)

In [3]:
datos.shape

(80, 1444)

In [4]:
import matplotlib.pylab as plt
from sklearn.decomposition import PCA

pca = PCA(n_components = 8)
data_pca = pca.fit(datos).transform(datos)
print('explained variance of first two components: {:%}'.format(pca.explained_variance_ratio_[0:2].sum()))



explained variance of first two components: 99.999998%


In [5]:
# Plot dimension reduced data in 2D
colores = dict( zip(np.unique(clases), ('blue', 'cyan', 'yellow')) )
etiquetas = ['No Antimicrobiano', 'Antimicrobiano', 'Desconocido']
plt.figure()
for element in np.unique(clases):
    plt.scatter(data_pca[clases == element, 1], data_pca[clases== element, 2],c=colores[element] ,label=etiquetas[element])

plt.legend()
plt.xlabel('PC2')
plt.ylabel('PC3')
plt.savefig('PCA23.svg')
plt.show()
