In [1]:

# Importando as dependências
from sklearn.neural_network import MLPClassifier
import numpy as np
import pandas as pd
import matplotlib.pyplot as mpl
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder, OneHotEncoder

In [2]:

#Importando dataset
df = pd.read_csv('dados/multiplelinearregretion.csv', delimiter=',')
df.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [3]:
df.rename(columns={'R&D Spend':'RDSpende', 'Marketing Spend': 'MarketingSpend'}, inplace=True)
df.head()

Unnamed: 0,RDSpende,Administration,MarketingSpend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [4]:
df.corr()

Unnamed: 0,RDSpende,Administration,MarketingSpend,Profit
RDSpende,1.0,0.241955,0.724248,0.9729
Administration,0.241955,1.0,-0.032154,0.200717
MarketingSpend,0.724248,-0.032154,1.0,0.747766
Profit,0.9729,0.200717,0.747766,1.0


In [5]:
# Preparando os dados
X = df.iloc[:, :-1].values # Todas as dimensões, exceto a Profit
y = df.iloc[:, -1].values

In [6]:
X

array([[165349.2, 136897.8, 471784.1, 'New York'],
       [162597.7, 151377.59, 443898.53, 'California'],
       [153441.51, 101145.55, 407934.54, 'Florida'],
       [144372.41, 118671.85, 383199.62, 'New York'],
       [142107.34, 91391.77, 366168.42, 'Florida'],
       [131876.9, 99814.71, 362861.36, 'New York'],
       [134615.46, 147198.87, 127716.82, 'California'],
       [130298.13, 145530.06, 323876.68, 'Florida'],
       [120542.52, 148718.95, 311613.29, 'New York'],
       [123334.88, 108679.17, 304981.62, 'California'],
       [101913.08, 110594.11, 229160.95, 'Florida'],
       [100671.96, 91790.61, 249744.55, 'California'],
       [93863.75, 127320.38, 249839.44, 'Florida'],
       [91992.39, 135495.07, 252664.93, 'California'],
       [119943.24, 156547.42, 256512.92, 'Florida'],
       [114523.61, 122616.84, 261776.23, 'New York'],
       [78013.11, 121597.55, 264346.06, 'California'],
       [94657.16, 145077.58, 282574.31, 'New York'],
       [91749.16, 114175.79, 29491

In [7]:
labelEnconding = LabelEncoder()
X[:, -1] = labelEnconding.fit_transform(X[:, -1])

X[:, -1]

array([2, 0, 1, 2, 1, 2, 0, 1, 2, 0, 1, 0, 1, 0, 1, 2, 0, 2, 1, 2, 0, 2,
       1, 1, 2, 0, 1, 2, 1, 2, 1, 2, 0, 1, 0, 2, 1, 0, 2, 0, 0, 1, 0, 2,
       0, 2, 1, 0, 2, 0], dtype=object)

In [8]:
ohe = OneHotEncoder(categorical_features=[3]) # 
# X = ohe.fit_transform(X).toarray() # Realizando a criação das Dummy Variables
# X

# y = ohe.fit_transform(y).toarray()
# y

# # Aplicando a técnica Dummy Coding
dc = OneHotEncoder(handle_unknown='ignore')
# # Realizando treinamento
y = dc.fit_transform(y.reshape(-1, 1)).toarray()
y

array([[0., 0., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 1., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

In [9]:
# X = X[:, 1:]
# y
X

array([[165349.2, 136897.8, 471784.1, 2],
       [162597.7, 151377.59, 443898.53, 0],
       [153441.51, 101145.55, 407934.54, 1],
       [144372.41, 118671.85, 383199.62, 2],
       [142107.34, 91391.77, 366168.42, 1],
       [131876.9, 99814.71, 362861.36, 2],
       [134615.46, 147198.87, 127716.82, 0],
       [130298.13, 145530.06, 323876.68, 1],
       [120542.52, 148718.95, 311613.29, 2],
       [123334.88, 108679.17, 304981.62, 0],
       [101913.08, 110594.11, 229160.95, 1],
       [100671.96, 91790.61, 249744.55, 0],
       [93863.75, 127320.38, 249839.44, 1],
       [91992.39, 135495.07, 252664.93, 0],
       [119943.24, 156547.42, 256512.92, 1],
       [114523.61, 122616.84, 261776.23, 2],
       [78013.11, 121597.55, 264346.06, 0],
       [94657.16, 145077.58, 282574.31, 2],
       [91749.16, 114175.79, 294919.57, 1],
       [86419.7, 153514.11, 0.0, 2],
       [76253.86, 113867.3, 298664.47, 0],
       [78389.47, 153773.43, 299737.29, 2],
       [73994.56, 122782.75, 30331

In [10]:
# Seleção de amostras para treino e teste
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.3)

In [11]:
# Criando um modelo de Rede Neural MLP
classificador = MLPClassifier(hidden_layer_sizes=(2,5), activation='logistic', solver='adam', verbose=10, random_state=1,
                    learning_rate_init=.1,  max_iter=200);




In [12]:
# Treinando o modelo
classificador.fit(X_treino, y_treino)
# y_treino

Iteration 1, loss = 35.33049884
Iteration 2, loss = 27.01477074
Iteration 3, loss = 20.30538943
Iteration 4, loss = 15.08188036
Iteration 5, loss = 11.25492672
Iteration 6, loss = 8.59373302
Iteration 7, loss = 6.73530158
Iteration 8, loss = 5.61951430
Iteration 9, loss = 5.07852897
Iteration 10, loss = 4.91025423
Iteration 11, loss = 4.94410280
Iteration 12, loss = 5.06982104
Iteration 13, loss = 5.22578194
Iteration 14, loss = 5.36940050
Iteration 15, loss = 5.50684339
Iteration 16, loss = 5.61489806
Iteration 17, loss = 5.70459795
Iteration 18, loss = 5.76082185
Iteration 19, loss = 5.78366481
Iteration 20, loss = 5.75164293
Iteration 21, loss = 5.46813571
Training loss did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


MLPClassifier(activation='logistic', alpha=0.0001, batch_size='auto',
       beta_1=0.9, beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(2, 5), learning_rate='constant',
       learning_rate_init=0.1, max_iter=200, momentum=0.9,
       n_iter_no_change=10, nesterovs_momentum=True, power_t=0.5,
       random_state=1, shuffle=True, solver='adam', tol=0.0001,
       validation_fraction=0.1, verbose=10, warm_start=False)

In [13]:
# classificador.predict(X_teste)
# print(classificador.predict(X_teste))
classificador.score(X_teste, y_teste)

0.0

TypeError: score() takes at least 3 arguments (2 given)