In [1]:
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from sklearn import *
from statistics import *

In [2]:
data = pd.read_csv ('bat1010.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:78]
y = data.iloc[:, 78]

In [3]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  46.291625927  secs 

Rmse per split:  [6.699527996667688, 6.740469706526199, 7.1412804910619245, 6.695476141694256, 6.843080724007163, 6.699527996667688, 6.740469706526199, 7.1412804910619245, 6.696233394449255, 6.844740366409361, 6.699527996667689, 6.740469706526199, 7.1412804910619245, 6.69601244344825, 6.846858137757148, 6.699527996667689, 6.740469706526199, 7.1412804910619245, 6.693310461379399, 6.844740366409362, 6.699527996667688, 6.740469706526199, 7.1412804910619245, 6.69601244344825, 6.846858137757148]
Mae per split:  [4.986275367885849, 4.88715807038608, 5.097225170012651, 4.838848955836553, 4.852501742241867, 4.986275367885849, 4.8871580703860795, 5.097225170012651, 4.83959987394909, 4.852886027659832, 4.986275367885849, 4.88715807038608, 5.097225170012651, 4.839387071213688, 4.85337122444085, 4.986275367885849, 4.88715807038608, 5.097225170012651, 4.836208536460309, 4.852886027659832, 4.986275367885849, 4.88715807038608, 5.097225170012651, 4.839387071213688, 4.8533

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, max_depth= 3, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  329.79031749200004  secs 

Mean RMSE:  8.230138271076108
Mean MAE:  6.05861682032063
Mean R2:  0.9053698073852033


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  504.389912551  secs 

Mean RMSE:  8.284271808905485
Mean MAE:  6.229551181050705
Mean R2:  0.9042006221137947


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  904.786932093  secs 

Mean RMSE:  7.382442335429019
Mean MAE:  4.918004274016268
Mean R2:  0.9238641420708646


In [4]:
data = pd.read_csv ('bat2020.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:51]
y = data.iloc[:, 51]

In [5]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))
#models.append(('lr', lr))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  40.62489511700001  secs 

Rmse per split:  [6.682616026219003, 6.5268191289836786, 7.083345175495365, 6.649416446884298, 6.547341281460583, 6.68260883803943, 6.5268191289836786, 7.083345175495365, 6.6483483318455585, 6.547341281460584, 6.682608838039431, 6.5268191289836786, 7.0833451754953645, 6.64950275495774, 6.547357917134643, 6.682616026219003, 6.5268191289836786, 7.0829435920608645, 6.650118622812254, 6.547341281460584, 6.682616026219003, 6.5268191289836786, 7.0833451754953645, 6.647027266781976, 6.547341281460584]
Mae per split:  [4.826281030339459, 4.618130970999284, 4.99523917612052, 4.8077606426118304, 4.761157492066239, 4.826010775182844, 4.618130970999284, 4.995239176120519, 4.807836226464526, 4.761157492066239, 4.826010775182844, 4.618130970999284, 4.995239176120519, 4.8076309953968, 4.7612229251932945, 4.826281030339459, 4.618130970999284, 4.994854835914508, 4.808776546616803, 4.761157492066239, 4.82628103033946, 4.618130970999284, 4.995239176120519, 4.80763099539

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  227.55686263200005  secs 

Mean RMSE:  8.369490311896177
Mean MAE:  6.1331377063235655
Mean R2:  0.9022722609484716


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  495.086914122  secs 

Mean RMSE:  7.653420347375407
Mean MAE:  5.7607068380928945
Mean R2:  0.9182930418445812


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  622.7372018110001  secs 

Mean RMSE:  7.2947189684866505
Mean MAE:  4.861668236696622
Mean R2:  0.9256377322420055


In [6]:
data = pd.read_csv ('bat3040.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:31]
y = data.iloc[:, 31]

In [7]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))



Time is  36.69645165  secs 

Rmse per split:  [7.698543298446938, 8.018363598277277, 8.47455185295213, 7.984768110789977, 7.991609781936758, 7.698543298446938, 8.018611208911215, 8.47455185295213, 7.984768110789976, 7.99218071904508, 7.698543298446938, 8.018363598277277, 8.47455185295213, 7.984768110789977, 7.991470019536287, 7.698543298446937, 8.018363598277277, 8.47455185295213, 7.984768110789977, 7.989991706872215, 7.698543298446938, 8.018363598277277, 8.47455185295213, 7.984768110789977, 7.991016806426918]
Mae per split:  [5.689394760877962, 5.8152200128938505, 5.983388274571352, 5.760578900759094, 5.764049029547913, 5.689394760877962, 5.815556028677988, 5.983388274571352, 5.760578900759094, 5.764152605481144, 5.689394760877962, 5.8152200128938505, 5.983388274571352, 5.760578900759094, 5.763982562173041, 5.689394760877962, 5.8152200128938505, 5.983388274571352, 5.760578900759094, 5.763120670608584, 5.689394760877962, 5.8152200128938505, 5.983388274571352, 5.760578900759094, 5.76361

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  135.0758827960001  secs 

Mean RMSE:  9.064280822724855
Mean MAE:  6.760921604324182
Mean R2:  0.8853848675819687


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  463.67287233799993  secs 

Mean RMSE:  10.275872385537221
Mean MAE:  7.679545783149506
Mean R2:  0.8525009819542253


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  398.7127326299999  secs 

Mean RMSE:  7.8899197004266375
Mean MAE:  5.34922598239483
Mean R2:  0.9131612636568109


In [8]:
data = pd.read_csv ('bat4080.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:67]
y = data.iloc[:, 67]

In [9]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  37.754143709000004  secs 

Rmse per split:  [6.817422932966837, 6.888263989800142, 7.375161064780479, 6.807742044411623, 7.098929980070928, 6.817422932966837, 6.888263989800142, 7.375161064780479, 6.810179442805772, 7.0989299800709285, 6.817422932966837, 6.888263989800142, 7.374895333344278, 6.808121019839605, 7.098929980070928, 6.817422932966838, 6.888263989800141, 7.374775442490358, 6.807742044411622, 7.0989299800709285, 6.817422932966837, 6.888263989800142, 7.37516106478048, 6.810847422936351, 7.0989299800709285]
Mae per split:  [4.999346353075878, 4.818627025468448, 5.079733740721713, 4.891163180045677, 5.1410152989608875, 4.999346353075877, 4.818627025468448, 5.079733740721713, 4.892502064165781, 5.141015298960889, 4.999346353075877, 4.818627025468448, 5.079733740721713, 4.891378585817135, 5.1410152989608875, 4.999346353075878, 4.818627025468448, 5.07934613527015, 4.891163180045675, 5.141015298960888, 4.999346353075877, 4.818627025468448, 5.079733740721714, 4.892851377427

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  281.524090451  secs 

Mean RMSE:  8.432318351199807
Mean MAE:  6.22362640725734
Mean R2:  0.9007254608857469


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  545.42035012  secs 

Mean RMSE:  7.986368686322368
Mean MAE:  6.001451606474944
Mean R2:  0.9110653770503958


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  789.863965693  secs 

Mean RMSE:  7.547074796609704
Mean MAE:  5.0753690978825095
Mean R2:  0.9204032461228756


In [10]:
data = pd.read_csv ('bat50160.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:50]
y = data.iloc[:, 50]

In [11]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  33.14920364299999  secs 

Rmse per split:  [6.617781601778183, 6.560174761405277, 7.189365900119987, 6.827271971054238, 6.933697596262316, 6.617781601778184, 6.560174761405277, 7.189365900119987, 6.829685126155664, 6.9336975962623155, 6.617781601778184, 6.560174761405277, 7.189365900119988, 6.831127812864305, 6.9336975962623155, 6.618141169152944, 6.5601747614052766, 7.189365900119987, 6.825573250710143, 6.933697596262316, 6.617781601778184, 6.560174761405277, 7.189365900119987, 6.832660547371889, 6.933697596262316]
Mae per split:  [4.953535023409909, 4.802640865099719, 5.1440662119875595, 4.987088723568585, 4.9032104298582775, 4.953535023409909, 4.802640865099719, 5.1440662119875595, 4.987675124139783, 4.9032104298582775, 4.953535023409908, 4.802640865099719, 5.14406621198756, 4.988422162113001, 4.903210429858277, 4.95333385857064, 4.802640865099719, 5.1440662119875595, 4.986047269842653, 4.9032104298582775, 4.953535023409909, 4.802640865099719, 5.1440662119875595, 4.98874914

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  234.87903634300005  secs 

Mean RMSE:  8.271182199476472
Mean MAE:  6.095701487702876
Mean R2:  0.9043524053387781


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  577.2379251  secs 

Mean RMSE:  8.260114131811338
Mean MAE:  6.29979132239857
Mean R2:  0.904863109660366


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  679.9962202090001  secs 

Mean RMSE:  7.2819013525844625
Mean MAE:  4.909932376271729
Mean R2:  0.9259265408529728
