* Here I have implemented ANN/MLPRegressor.
* I have used TPOT for Hyperparameter Tuning.
* TPOT uses Genetic Algorithm to tune hyperparameters. 

# Import Libraries

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from tpot import TPOTClassifier
from sklearn.metrics import mean_squared_error
import warnings
warnings.filterwarnings('ignore')



# Data Preparation

In [None]:
# train
df = pd.read_csv('/content/train.csv').dropna()
df.head()

Unnamed: 0,gender,age,number_of_kids,day,online_hours
0,FEMALE,49,4,1,2.233333
1,FEMALE,49,4,2,2.516667
2,FEMALE,49,4,5,4.683333
3,FEMALE,49,4,6,3.133333
4,FEMALE,49,4,7,2.65


In [None]:
# one-hot encoding
df['gender'] = pd.get_dummies(df['gender'])['FEMALE']
df.head()

Unnamed: 0,gender,age,number_of_kids,day,online_hours
0,1,49,4,1,2.233333
1,1,49,4,2,2.516667
2,1,49,4,5,4.683333
3,1,49,4,6,3.133333
4,1,49,4,7,2.65


In [None]:
# Feature Scaling
scaler = StandardScaler()
df[df.columns[1:-1]]= scaler.fit_transform(df[df.columns[1:-1]])
df.head()

Unnamed: 0,gender,age,number_of_kids,day,online_hours
0,1,0.917488,1.722336,-1.654684,2.233333
1,1,0.917488,1.722336,-1.491059,2.516667
2,1,0.917488,1.722336,-1.000185,4.683333
3,1,0.917488,1.722336,-0.83656,3.133333
4,1,0.917488,1.722336,-0.672935,2.65


In [None]:
# test
df2 = pd.read_csv('/content/test.csv').dropna()
df2.head()

Unnamed: 0,gender,age,number_of_kids,day,online_hours
0,MALE,26,2,28,7
1,MALE,26,2,27,9
2,MALE,26,2,26,9
3,MALE,26,2,25,10
4,MALE,26,2,24,9


In [None]:
df2['gender'] = pd.get_dummies(df2['gender'])['FEMALE']
df2.head()

Unnamed: 0,gender,age,number_of_kids,day,online_hours
0,0,26,2,28,7
1,0,26,2,27,9
2,0,26,2,26,9
3,0,26,2,25,10
4,0,26,2,24,9


In [None]:
df2[df2.columns[1:-1]]= scaler.transform(df2[df2.columns[1:-1]])
df2.head()

Unnamed: 0,gender,age,number_of_kids,day,online_hours
0,0,-0.701578,0.394894,2.76319,7
1,0,-0.701578,0.394894,2.599565,9
2,0,-0.701578,0.394894,2.43594,9
3,0,-0.701578,0.394894,2.272315,10
4,0,-0.701578,0.394894,2.10869,9


# Train-test split

In [None]:
X_train = df.iloc[:,:-1]
y_train = df.iloc[:,-1]

X_test = df2.iloc[:,:-1]
y_test = df2.iloc[:,-1]

# Training

In [None]:
# Domain/Search Space
parameters = {'hidden_layer_sizes': [(7,7), (7,7,7), 
                                     (4,4), (4,4,4)],
              'activation': ['relu','tanh','logistic'],
              'solver': ['sgd', 'adam'],
              'alpha': [0.01,0.0001],
              'learning_rate_init': [0.1,0.001],
              'learning_rate': ['constant','invscaling','adaptive']
             }

In [None]:
# training
model = TPOTRegressor(generations= 5, 
                       population_size= 50,
                       verbosity= 2, 
                       config_dict={'sklearn.neural_network.MLPRegressor': parameters}, 
                       cv = 2)

model.fit(X_train,y_train)

HBox(children=(FloatProgress(value=0.0, description='Optimization Progress', max=300.0, style=ProgressStyle(de…


Generation 1 - Current best internal CV score: -6.6288511749634065

Generation 2 - Current best internal CV score: -6.6265454110364646

Generation 3 - Current best internal CV score: -6.621462042041843

Generation 4 - Current best internal CV score: -6.5872308866098255

Generation 5 - Current best internal CV score: -6.5872308866098255

Best pipeline: MLPRegressor(MLPRegressor(input_matrix, activation=tanh, alpha=0.01, hidden_layer_sizes=(4, 4), learning_rate=adaptive, learning_rate_init=0.1, solver=sgd), activation=logistic, alpha=0.01, hidden_layer_sizes=(4, 4), learning_rate=adaptive, learning_rate_init=0.001, solver=sgd)


TPOTClassifier(config_dict={'sklearn.neural_network.MLPRegressor': {'activation': ['relu',
                                                                                   'tanh',
                                                                                   'logistic'],
                                                                    'alpha': [0.01,
                                                                              0.0001],
                                                                    'hidden_layer_sizes': [(7,
                                                                                            7),
                                                                                           (7,
                                                                                            7,
                                                                                            7),
                                                                          

# Testing

In [None]:
y_pred = model.predict(X_test)
mean_squared_error(y_test,y_pred, squared=False)

3.10242224012687