In [1]:

# Importando as dependências
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as mpl
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

In [2]:

#Importando dataset
df = pd.read_csv('dados/multiplelinearregretion.csv', delimiter=',')
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [3]:
df.rename(columns={'R&D Spend':'RDSpende', 'Marketing Spend': 'MarketingSpend'}, inplace=True)
df.head()

Unnamed: 0,RDSpende,Administration,MarketingSpend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [4]:
df.corr()

Unnamed: 0,RDSpende,Administration,MarketingSpend,Profit
RDSpende,1.0,0.241955,0.724248,0.9729
Administration,0.241955,1.0,-0.032154,0.200717
MarketingSpend,0.724248,-0.032154,1.0,0.747766
Profit,0.9729,0.200717,0.747766,1.0


In [5]:
# Preparando os dados
X = df.iloc[:, :-1].values # Todas as dimensões, exceto a Profit
y = df.iloc[:, -1].values

In [6]:
X

array([[165349.2, 136897.8, 471784.1, 'New York'],
       [162597.7, 151377.59, 443898.53, 'California'],
       [153441.51, 101145.55, 407934.54, 'Florida'],
       [144372.41, 118671.85, 383199.62, 'New York'],
       [142107.34, 91391.77, 366168.42, 'Florida'],
       [131876.9, 99814.71, 362861.36, 'New York'],
       [134615.46, 147198.87, 127716.82, 'California'],
       [130298.13, 145530.06, 323876.68, 'Florida'],
       [120542.52, 148718.95, 311613.29, 'New York'],
       [123334.88, 108679.17, 304981.62, 'California'],
       [101913.08, 110594.11, 229160.95, 'Florida'],
       [100671.96, 91790.61, 249744.55, 'California'],
       [93863.75, 127320.38, 249839.44, 'Florida'],
       [91992.39, 135495.07, 252664.93, 'California'],
       [119943.24, 156547.42, 256512.92, 'Florida'],
       [114523.61, 122616.84, 261776.23, 'New York'],
       [78013.11, 121597.55, 264346.06, 'California'],
       [94657.16, 145077.58, 282574.31, 'New York'],
       [91749.16, 114175.79, 29491

In [7]:
labelEnconding = LabelEncoder()
X[:, -1] = labelEnconding.fit_transform(X[:, -1])

X[:, -1]

array([2, 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 0, 1, 0, 1, 2, 0, 2, 1, 2, 0, 2,
       1, 1, 2, 0, 1, 2, 1, 2, 1, 2, 0, 1, 0, 2, 1, 0, 2, 0, 0, 1, 0, 2,
       0, 2, 1, 0, 2, 0], dtype=object)

In [8]:
ohe = OneHotEncoder(categorical_features=[3]) # 
# X = ohe.fit_transform(X).toarray() # Realizando a criação das Dummy Variables
# X

# y = ohe.fit_transform(y).toarray()
# y

# # Aplicando a técnica Dummy Coding
dc = OneHotEncoder(handle_unknown='ignore')
# # Realizando treinamento
# y = dc.fit_transform(y.reshape(-1, 1)).toarray()
y = dc.fit_transform(y.reshape(-1, 1)).toarray()


In [9]:
# X = X[:, 1:]
# y
X

array([[165349.2, 136897.8, 471784.1, 2],
       [162597.7, 151377.59, 443898.53, 0],
       [153441.51, 101145.55, 407934.54, 1],
       [144372.41, 118671.85, 383199.62, 2],
       [142107.34, 91391.77, 366168.42, 1],
       [131876.9, 99814.71, 362861.36, 2],
       [134615.46, 147198.87, 127716.82, 0],
       [130298.13, 145530.06, 323876.68, 1],
       [120542.52, 148718.95, 311613.29, 2],
       [123334.88, 108679.17, 304981.62, 0],
       [101913.08, 110594.11, 229160.95, 1],
       [100671.96, 91790.61, 249744.55, 0],
       [93863.75, 127320.38, 249839.44, 1],
       [91992.39, 135495.07, 252664.93, 0],
       [119943.24, 156547.42, 256512.92, 1],
       [114523.61, 122616.84, 261776.23, 2],
       [78013.11, 121597.55, 264346.06, 0],
       [94657.16, 145077.58, 282574.31, 2],
       [91749.16, 114175.79, 294919.57, 1],
       [86419.7, 153514.11, 0.0, 2],
       [76253.86, 113867.3, 298664.47, 0],
       [78389.47, 153773.43, 299737.29, 2],
       [73994.56, 122782.75, 30331

In [10]:
# Seleção de amostras para treino e teste
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.3)

In [11]:
# Criando um modelo de Rede Neural MLP
classificador = MLPClassifier(hidden_layer_sizes=(2,), activation='logistic', solver='adam', verbose=10, random_state=1,
                    learning_rate_init=.01,  max_iter=1000);




In [12]:
# Treinando o modelo
classificador.fit(X_treino, y_treino)
# y_treino

Iteration 1, loss = 34.31045957
Iteration 2, loss = 34.07332747
Iteration 3, loss = 33.83747288
Iteration 4, loss = 33.60291715
Iteration 5, loss = 33.36968110
Iteration 6, loss = 33.13778500
Iteration 7, loss = 32.90724853
Iteration 8, loss = 32.67809075
Iteration 9, loss = 32.45032999
Iteration 10, loss = 32.22398391
Iteration 11, loss = 31.99906936
Iteration 12, loss = 31.77560245
Iteration 13, loss = 31.55359841
Iteration 14, loss = 31.33307167
Iteration 15, loss = 31.11403578
Iteration 16, loss = 30.89650338
Iteration 17, loss = 30.68048626
Iteration 18, loss = 30.46599526
Iteration 19, loss = 30.25304034
Iteration 20, loss = 30.04163054
Iteration 21, loss = 29.83177399
Iteration 22, loss = 29.62347794
Iteration 23, loss = 29.41674872
Iteration 24, loss = 29.21159179
Iteration 25, loss = 29.00801177
Iteration 26, loss = 28.80601240
Iteration 27, loss = 28.60559663
Iteration 28, loss = 28.40676658
Iteration 29, loss = 28.20952359
Iteration 30, loss = 28.01386827
Iteration 31, loss 

Iteration 373, loss = 4.66265509
Iteration 374, loss = 4.66213823
Iteration 375, loss = 4.66162507
Iteration 376, loss = 4.66111558
Iteration 377, loss = 4.66060972
Iteration 378, loss = 4.66010744
Iteration 379, loss = 4.65960872
Iteration 380, loss = 4.65911352
Iteration 381, loss = 4.65862179
Iteration 382, loss = 4.65813350
Iteration 383, loss = 4.65764862
Iteration 384, loss = 4.65716711
Iteration 385, loss = 4.65668894
Iteration 386, loss = 4.65621407
Iteration 387, loss = 4.65574246
Iteration 388, loss = 4.65527410
Iteration 389, loss = 4.65480893
Iteration 390, loss = 4.65434694
Iteration 391, loss = 4.65388808
Iteration 392, loss = 4.65343234
Iteration 393, loss = 4.65297966
Iteration 394, loss = 4.65253004
Iteration 395, loss = 4.65208343
Iteration 396, loss = 4.65163981
Iteration 397, loss = 4.65119914
Iteration 398, loss = 4.65076141
Iteration 399, loss = 4.65032657
Iteration 400, loss = 4.64989461
Iteration 401, loss = 4.64946548
Iteration 402, loss = 4.64903918
Iteration 

Iteration 633, loss = 4.59381382
Iteration 634, loss = 4.59368207
Iteration 635, loss = 4.59355085
Iteration 636, loss = 4.59342016
Iteration 637, loss = 4.59328998
Iteration 638, loss = 4.59316033
Iteration 639, loss = 4.59303119
Iteration 640, loss = 4.59290257
Iteration 641, loss = 4.59277446
Iteration 642, loss = 4.59264685
Iteration 643, loss = 4.59251975
Iteration 644, loss = 4.59239315
Iteration 645, loss = 4.59226705
Iteration 646, loss = 4.59214145
Iteration 647, loss = 4.59201634
Iteration 648, loss = 4.59189172
Iteration 649, loss = 4.59176759
Iteration 650, loss = 4.59164395
Iteration 651, loss = 4.59152079
Iteration 652, loss = 4.59139811
Iteration 653, loss = 4.59127590
Iteration 654, loss = 4.59115417
Iteration 655, loss = 4.59103292
Iteration 656, loss = 4.59091213
Iteration 657, loss = 4.59079182
Iteration 658, loss = 4.59067196
Iteration 659, loss = 4.59055257
Iteration 660, loss = 4.59043364
Iteration 661, loss = 4.59031517
Iteration 662, loss = 4.59019715
Iteration 

MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(2,), learning_rate='constant',
       learning_rate_init=0.01, max_iter=1000, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=10, warm_start=False)

In [13]:
# classificador.predict(X_teste)
# print(classificador.predict(X_teste))
classificador.score(X_teste, y_teste)

0.0