In [1]:
from sklearn.model_selection import KFold, StratifiedKFold
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import numpy as np
import pandas as pd
import time
import matplotlib.pyplot as plt
from sklearn import *
from statistics import *

In [2]:
data = pd.read_csv ('cuckoo1010.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:54]
y = data.iloc[:, 54]

In [3]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))



Time is  41.910955679  secs 

Rmse per split:  [6.4856439385924505, 6.1802147594257555, 6.915466493282468, 6.607249164822035, 6.638120578761677, 6.4856439385924505, 6.1802147594257555, 6.915466493282468, 6.6082979350195625, 6.638120578761676, 6.4856439385924505, 6.1802147594257555, 6.915466493282468, 6.607249164822034, 6.638120578761676, 6.4856439385924505, 6.180214759425755, 6.915466493282468, 6.6082979350195625, 6.638120578761676, 6.4856439385924505, 6.180214759425755, 6.915466493282468, 6.607249164822035, 6.638120578761676]
Mae per split:  [4.752532938666466, 4.50017059511765, 4.903386374162184, 4.7295722977251025, 4.697403588969783, 4.752532938666466, 4.500170595117651, 4.903386374162184, 4.731495121748298, 4.697403588969783, 4.752532938666466, 4.50017059511765, 4.903386374162184, 4.7295722977251025, 4.697403588969784, 4.752532938666466, 4.50017059511765, 4.903386374162184, 4.731495121748299, 4.697403588969783, 4.752532938666466, 4.500170595117649, 4.903386374162184, 4.729572297725

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  254.66454651900003  secs 

Mean RMSE:  8.216946482295356
Mean MAE:  6.040960130579838
Mean R2:  0.9057291264347046


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  431.082683837  secs 

Mean RMSE:  7.331781718512818
Mean MAE:  5.5494304030239885
Mean R2:  0.9248394476830194


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  667.672822601  secs 

Mean RMSE:  7.245841377601706
Mean MAE:  4.805624029700142
Mean R2:  0.9266695633735949


In [4]:
data = pd.read_csv ('cuckoo2020.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:40]
y = data.iloc[:, 40]

In [5]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  42.428301417  secs 

Rmse per split:  [6.794494209064911, 6.499110769760559, 7.442118756338179, 7.082034931104707, 6.851495847265875, 6.794494209064911, 6.498873167489782, 7.442118756338179, 7.082034931104707, 6.851495847265875, 6.794494209064911, 6.499086805002821, 7.442118756338179, 7.082034931104707, 6.851495847265875, 6.794494209064911, 6.498877887042421, 7.442118756338179, 7.082034931104707, 6.851495847265875, 6.794494209064911, 6.498944643117199, 7.442118756338179, 7.082034931104706, 6.851495847265875]
Mae per split:  [4.974440545981652, 4.570018862472951, 5.180930693815576, 5.056138296274574, 4.9351044381577625, 4.974440545981652, 4.57001886247295, 5.180930693815576, 5.0561382962745745, 4.9351044381577625, 4.974440545981652, 4.570177540122493, 5.180930693815576, 5.0561382962745745, 4.9351044381577625, 4.974440545981652, 4.5698601848234075, 5.180930693815576, 5.056138296274574, 4.9351044381577625, 4.974440545981652, 4.570018862472951, 5.180930693815576, 5.056138296274574

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  188.97630078000003  secs 

Mean RMSE:  8.569793296118661
Mean MAE:  6.305398999262755
Mean R2:  0.8973870060566072


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  454.805064272  secs 

Mean RMSE:  8.0592597780115
Mean MAE:  6.044817383954251
Mean R2:  0.9091095783510944


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  494.34590613599994  secs 

Mean RMSE:  7.360844273622841
Mean MAE:  4.903944893059235
Mean R2:  0.924341237387049


In [6]:
data = pd.read_csv ('cuckoo3040.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:32]
y = data.iloc[:, 32]

In [7]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))



Time is  35.939944909000005  secs 

Rmse per split:  [7.681510495393649, 7.3060396945550155, 8.215421592317895, 7.529913850823039, 7.551096910176424, 7.681510495393649, 7.306039694555015, 8.215421592317895, 7.529913850823039, 7.5510969101764225, 7.681510495393649, 7.306039694555015, 8.215421592317895, 7.52991385082304, 7.551096910176423, 7.681510495393649, 7.3060396945550155, 8.215421592317895, 7.529913850823039, 7.551096910176424, 7.681510495393649, 7.306039694555015, 8.215421592317895, 7.529913850823039, 7.551096910176423]
Mae per split:  [5.583112913051763, 5.386419754646426, 5.9423502685949945, 5.488017166979362, 5.418494236311869, 5.583112913051761, 5.386419754646425, 5.942350268594994, 5.488017166979361, 5.418494236311869, 5.583112913051761, 5.386419754646425, 5.9423502685949945, 5.488017166979361, 5.418494236311869, 5.583112913051761, 5.386419754646425, 5.9423502685949945, 5.488017166979362, 5.418494236311869, 5.583112913051761, 5.386419754646425, 5.9423502685949945, 5.488017166

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  156.57441470899994  secs 

Mean RMSE:  8.849074890151604
Mean MAE:  6.5099877774719355
Mean R2:  0.8904668042523329


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  521.9237461299999  secs 

Mean RMSE:  9.478520109948676
Mean MAE:  7.199811791941863
Mean R2:  0.8745905932208315


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  439.9819102240001  secs 

Mean RMSE:  7.936730751805148
Mean MAE:  5.382929742745915
Mean R2:  0.9121411720802534


In [8]:
data = pd.read_csv ('cuckoo4080.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:34]
y = data.iloc[:, 34]

In [9]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  34.279094519  secs 

Rmse per split:  [7.652332619704622, 7.312650650849224, 8.291684605468642, 7.745721536108293, 7.868058748390244, 7.652332619704622, 7.312650650849224, 8.29168460546864, 7.745721536108293, 7.867906839356532, 7.652332619704622, 7.312650650849224, 8.29168460546864, 7.745721536108293, 7.867733682420009, 7.652332619704622, 7.312650650849224, 8.29168460546864, 7.745721536108293, 7.868058748390244, 7.652332619704622, 7.312650650849224, 8.29168460546864, 7.7457215361082925, 7.868058748390244]
Mae per split:  [5.79297298102121, 5.302834819028147, 5.884231023514615, 5.692472601368296, 5.555508791571697, 5.79297298102121, 5.302834819028148, 5.884231023514614, 5.692472601368295, 5.5551946587814935, 5.79297298102121, 5.302834819028148, 5.884231023514614, 5.692472601368295, 5.554810391082767, 5.79297298102121, 5.302834819028148, 5.884231023514615, 5.692472601368295, 5.555508791571697, 5.792972981021209, 5.302834819028147, 5.884231023514614, 5.692472601368295, 5.55550879

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  154.57221371799994  secs 

Mean RMSE:  8.674071841131916
Mean MAE:  6.321762000699608
Mean R2:  0.8948646820896209


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  489.436917301  secs 

Mean RMSE:  10.347579414758862
Mean MAE:  7.7864754653008195
Mean R2:  0.850549241452696


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  442.00662586  secs 

Mean RMSE:  7.703928258224689
Mean MAE:  5.230583938721984
Mean R2:  0.9171468175574551


In [10]:
data = pd.read_csv ('cuckoo50160.csv',sep=',',dtype= np.float64)
x = data.iloc[:, 0:31]
y = data.iloc[:, 31]

In [11]:
#Voting Ensemble
t1 = time.process_time()

models = list()
mlp = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
gbr = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
rf = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)

models.append(('mlp', mlp))
models.append(('gbr', gbr))
models.append(('rf', rf))

n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.VotingRegressor(estimators=models, n_jobs=-1)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
print('Rmse per split: ', rmse)
print('Mae per split: ', mae)
print('R2 per split: ', r2)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  32.427176839  secs 

Rmse per split:  [7.094312429072221, 6.577038021873645, 7.4410810185334535, 7.178454324253339, 6.985477134587008, 7.094312429072221, 6.577038021873646, 7.441081018533452, 7.182099912887546, 6.985477134587008, 7.094312429072219, 6.577038021873645, 7.4410810185334535, 7.181435581839144, 6.985477134587008, 7.094312429072221, 6.577038021873645, 7.441081018533452, 7.1825481096045465, 6.985477134587008, 7.094312429072219, 6.577038021873645, 7.4410810185334535, 7.183404603544403, 6.985477134587008]
Mae per split:  [5.193174645089942, 4.773768395611034, 5.3505790395084105, 5.134526552400102, 4.946266248709228, 5.193174645089942, 4.773768395611035, 5.3505790395084105, 5.136813372395286, 4.946266248709228, 5.193174645089942, 4.773768395611034, 5.3505790395084105, 5.136335067969824, 4.946266248709228, 5.193174645089942, 4.773768395611034, 5.3505790395084105, 5.137073103955157, 4.946266248709228, 5.193174645089942, 4.773768395611033, 5.3505790395084105, 5.137482902451

In [None]:
#Gradient Boosting
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.GradientBoostingRegressor(n_estimators= 1000, learning_rate= 0.1, loss= 'ls')
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  171.058971424  secs 

Mean RMSE:  8.391810406356987
Mean MAE:  6.221633776938588
Mean R2:  0.9016854719580624


In [None]:
#Multilayer Perceptron
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = neural_network.MLPRegressor(hidden_layer_sizes=500, activation='relu', max_iter=500, solver='adam', alpha=0.1, batch_size='auto', 
                                            learning_rate='adaptive', learning_rate_init=0.01, shuffle=True, random_state=7)
    
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  443.274075582  secs 

Mean RMSE:  8.319792835015615
Mean MAE:  6.242268148448509
Mean R2:  0.9034174933291915


In [None]:
#Random Forest
t1 = time.process_time()
n_repeat = 5
outer_cv = KFold(n_splits=5, shuffle=True, random_state=7)

rmse = []
mae = []
r2 = []
for i in range(n_repeat):
    for train_idx, test_idx in outer_cv.split(x, y):
        train_data, test_data = x.iloc[train_idx], x.iloc[test_idx]
        train_target = y.iloc[train_idx]
    
        model = ensemble.RandomForestRegressor(n_estimators= 1000, random_state=7)
        
        classifier = model.fit(train_data, train_target)
        y_pred = classifier.predict(test_data)
        rmsee = np.sqrt(metrics.mean_squared_error(y.iloc[test_idx], y_pred))
        maee = metrics.mean_absolute_error(y.iloc[test_idx], y_pred)
        r22 = metrics.r2_score(y.iloc[test_idx], y_pred)
        rmse.append(rmsee)
        mae.append(maee)
        r2.append(r22)
        #print(rmse)
t2 = time.process_time()
print('Time is ', str(t2-t1), ' secs \n')
#print('Rmse per split: ', rmse)
print('Mean RMSE: ', mean(rmse))
print('Mean MAE: ', mean(mae))
print('Mean R2: ', mean(r2))

Time is  466.4573130059998  secs 

Mean RMSE:  7.607161011974955
Mean MAE:  5.139050110649218
Mean R2:  0.9192092064625758
