# PCA 

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
%matplotlib inline

In [2]:
url="http://archive.ics.uci.edu/ml/machine-learning-databases/abalone/abalone.data"

In [3]:
df = pd.read_csv(url , 
                names = ['Sex','Length','Diameter','Height','Whole weight',
                         'Shucked weight','Viscera weight','Shell weight','Rings'])

In [4]:
df.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [5]:
sexo = []
for x in df['Sex']:
    if x == 'M':
        sexo.append(0)
    if x == 'F':
        sexo.append(1)
    if x == 'I':
        sexo.append(2)

In [6]:
df['sexo'] = sexo

In [7]:
df = df.drop(columns=['Sex'])

In [8]:
df = df.rename(columns={'sexo': 'Sex'})

In [9]:
df = df.rename(columns={'Rings':'Target'})

In [10]:
df.head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Target,Sex
0,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15,0
1,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7,0
2,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9,1
3,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10,0
4,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7,2


In [11]:
features = ['Length','Diameter','Height','Whole weight',
            'Shucked weight','Viscera weight','Shell weight','Sex']

In [12]:
x = df.loc[:, features].values

In [13]:
y = df.loc[:,['Target']].values

In [14]:
x = StandardScaler().fit_transform(x)

In [15]:
pd.DataFrame(data = x, columns = features).head()

Unnamed: 0,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Sex
0,-0.574558,-0.432149,-1.064424,-0.641898,-0.607685,-0.726212,-0.638217,-1.154346
1,-1.448986,-1.439929,-1.183978,-1.230277,-1.17091,-1.205221,-1.212987,-1.154346
2,0.050033,0.12213,-0.107991,-0.309469,-0.4635,-0.35669,-0.207139,0.053798
3,-0.699476,-0.432149,-0.347099,-0.637819,-0.648238,-0.6076,-0.602294,-1.154346
4,-1.615544,-1.540707,-1.423087,-1.272086,-1.215968,-1.287337,-1.320757,1.261943


# PCA projection

In [16]:
pca = PCA(n_components=4)

In [17]:
principalComponents = pca.fit_transform(x)

In [18]:
principalDf = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2',
                          'principal component 3', 'principal component 4'])
                         #, 'principal component 5'])

In [19]:
principalDf.head()

Unnamed: 0,principal component 1,principal component 2,principal component 3,principal component 4
0,-1.479978,-1.485778,-0.412306,-0.331872
1,-3.052013,-1.821987,-0.131567,0.24995
2,-0.482987,-0.050939,0.25145,-0.443952
3,-1.238138,-1.445609,0.18637,-0.002365
4,-3.835864,0.483813,-0.264781,0.275736


In [20]:
finalDf = pd.concat([principalDf, df[['Target']]], axis = 1)


In [21]:
targets = ['1','2','3','4','5','6','7','8','9','10',
          '11','12','13','14','15','16','17','18','19','20',
          '21','22','23','24','25','26','27','28','29',]

# Classificador

In [22]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.neural_network import MLPClassifier
X = df.loc[:, features].values
Y = df.loc[:,['Target']].values

n_samples = 4177
X_train = X[:int(1 * n_samples)]
y_train = Y[:int(1 * n_samples)]
X_test = X[int(0 * n_samples):]
y_test = Y[int(0* n_samples):]


clf = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, 
                    batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5,
                    max_iter=2000, shuffle=True, random_state=None, tol=0.0001, verbose=False, 
                    warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, 
                    validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)



In [23]:
clf.fit(X_train,y_train)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=2000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [24]:
clf.predict(X_train)

array([ 8,  7,  9, ..., 10,  9, 11], dtype=int64)

In [25]:
accuracy_score(clf.predict(X_test),y_test)

0.2932726837443141

In [26]:
target_names = ['class 1', 'class 2','class 3','class 4',
                'class 5','class 6','class 7','class 8','class 9',
               'class 10', 'class 11', 'class 12','class 13','class 14',
                'class 15','class 16','class 17','class 18','class 19',
               'class 20', 'class 21', 'class 22','class 23','class 24',
                'class 25','class 26','class 27','class 28']

In [27]:
print( classification_report(y_train, clf.predict(X_train), target_names=target_names))

              precision    recall  f1-score   support

     class 1       0.00      0.00      0.00         1
     class 2       0.00      0.00      0.00         1
     class 3       0.00      0.00      0.00        15
     class 4       0.46      0.54      0.50        57
     class 5       0.41      0.37      0.39       115
     class 6       0.37      0.27      0.31       259
     class 7       0.34      0.45      0.39       391
     class 8       0.37      0.35      0.36       568
     class 9       0.28      0.51      0.36       689
    class 10       0.23      0.32      0.27       634
    class 11       0.32      0.20      0.25       487
    class 12       0.23      0.01      0.02       267
    class 13       0.15      0.13      0.14       203
    class 14       0.00      0.00      0.00       126
    class 15       0.12      0.05      0.07       103
    class 16       0.17      0.16      0.17        67
    class 17       0.25      0.05      0.09        58
    class 18       0.00    

  'precision', 'predicted', average, warn_for)


# Classificador componentes reduzidas

In [28]:
features = ['principal component 1', 'principal component 2','principal component 3', 'principal component 4']


In [29]:
Xl = principalDf.loc[:, features].values
Yl = df.loc[:,['Target']].values


In [30]:
Xl = finalDf.loc[:, features].values
Yl = finalDf.loc[:,['Target']].values

n_samplesl = 4177
X_trainl = Xl[:int(1 * n_samples)]
y_trainl = Yl[:int(1 * n_samples)]
X_testl = Xl[int(0 * n_samples):]
y_testl = Yl[int(0* n_samples):]


clfl = MLPClassifier(hidden_layer_sizes=(100, ), activation='relu', solver='adam', alpha=0.0001, 
                    batch_size='auto', learning_rate='constant', learning_rate_init=0.001, power_t=0.5,
                    max_iter=2000, shuffle=True, random_state=None, tol=0.0001, verbose=False, 
                    warm_start=False, momentum=0.9, nesterovs_momentum=True, early_stopping=False, 
                    validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08, n_iter_no_change=10)

In [31]:
clfl.fit(X_trainl,y_trainl)

  y = column_or_1d(y, warn=True)


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
              beta_2=0.999, early_stopping=False, epsilon=1e-08,
              hidden_layer_sizes=(100,), learning_rate='constant',
              learning_rate_init=0.001, max_iter=2000, momentum=0.9,
              n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
              random_state=None, shuffle=True, solver='adam', tol=0.0001,
              validation_fraction=0.1, verbose=False, warm_start=False)

In [32]:
accuracy_score(clfl.predict(X_testl),y_testl)

0.3064400287287527

In [33]:
print(classification_report(y_train, clf.predict(X_train), target_names=target_names))

              precision    recall  f1-score   support

     class 1       0.00      0.00      0.00         1
     class 2       0.00      0.00      0.00         1
     class 3       0.00      0.00      0.00        15
     class 4       0.46      0.54      0.50        57
     class 5       0.41      0.37      0.39       115
     class 6       0.37      0.27      0.31       259
     class 7       0.34      0.45      0.39       391
     class 8       0.37      0.35      0.36       568
     class 9       0.28      0.51      0.36       689
    class 10       0.23      0.32      0.27       634
    class 11       0.32      0.20      0.25       487
    class 12       0.23      0.01      0.02       267
    class 13       0.15      0.13      0.14       203
    class 14       0.00      0.00      0.00       126
    class 15       0.12      0.05      0.07       103
    class 16       0.17      0.16      0.17        67
    class 17       0.25      0.05      0.09        58
    class 18       0.00    

  'precision', 'predicted', average, warn_for)
