# Rathmines - Rank the Algorithms
Only PM10 algorithms were ranked since NO2 was primarily for benchmarking

# Import all the various packages

In [1]:
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
from sklearn.compose import ColumnTransformer
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import StandardScaler, RobustScaler, FunctionTransformer
from sklearn.model_selection import train_test_split, GridSearchCV
import pickle
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
# imports necessary for dimensionality reduction
from sklearn.decomposition import PCA
from sklearn.feature_selection import RFE
from sklearn import svm
from sklearn.decomposition import FastICA
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import FactorAnalysis
# regression algorithms
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor

from sklearn.svm import SVR

# metrics for evaluating regression models
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, median_absolute_error, explained_variance_score, max_error

from time import process_time

from sklearn.model_selection import train_test_split
from sklearn.linear_model import Lasso
from yellowbrick.regressor import PredictionError

from sklearn.neural_network import MLPRegressor

# Import Results from previous notebooks

In [2]:
RathminesFinal = pd.read_csv('Rathmines PM10 final iterations results.csv', thousands=',', low_memory=False)

In [3]:
RathminesFinal.tail()

Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12
10,Linear Regression,NO2 and SO2,6.141059,142.417964,4.333517,95.494681,,9,0.258094,4m 2s,0.3,2017-2019,
11,Linear Regression,NO WIND,6.340251,132.535127,4.5763,98.335372,,7,0.252679,3m 3s,0.3,2017-2019,
12,SVM,,5.097907,135.409361,2.929126,84.782734,,7,0.304188,53s,0.3,2017-2019,
13,SVM,NO2 and SO2,4.583048,130.581039,2.683639,73.12636,,9,0.419195,58s,0.3,2017-2019,
14,SVM,NO WIND,5.048022,148.362647,2.856737,96.70872,,7,0.337406,46s,0.3,2017-2019,


# Select Results for a certain combination of input parameters

In [4]:
def SelectAlgorithms(Data, Condition):
    Selecteddf = Data
    Selecteddf = Selecteddf[(Data['Feature Added'] == Condition)]
    
    return Selecteddf
RankBase = SelectAlgorithms(RathminesFinal, 'None')
RankPollutant = SelectAlgorithms(RathminesFinal, 'NO2 and SO2')
RankNoWind = SelectAlgorithms(RathminesFinal, 'NO WIND')

In [5]:
display(RankBase)
display(RankPollutant)
display(RankNoWind)

Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12
0,Random Forest,,5.061335,142.517,2.9965,84.115567,12.0,7,0.436529,9m 36s,0.3,2017-2019,
3,Gradient Boosting,,4.934718,151.821084,2.973308,85.366033,12.0,7,0.36799,2m 16s,0.3,2017-2019,
6,ANN,,5.426977,104.808756,3.560898,77.242005,,7,0.427781,1m 22s,0.3,2017-2019,
9,Linear Regression,,6.192442,140.953068,4.530404,92.575934,,7,0.213066,2m 33s,0.3,2017-2019,
12,SVM,,5.097907,135.409361,2.929126,84.782734,,7,0.304188,53s,0.3,2017-2019,


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12
1,Random Forest,NO2 and SO2,4.615564,134.613563,2.99544,59.428183,12.0,9,0.53907,17m 1s,0.3,2017-2019,
4,Gradient Boosting,NO2 and SO2,4.535282,83.792687,2.749209,61.996428,12.0,9,0.516236,3m 10s,0.3,2017-2019,
7,ANN,NO2 and SO2,4.758106,99.815977,3.181172,58.384324,,9,0.511157,1m 22s,0.3,2017-2019,
10,Linear Regression,NO2 and SO2,6.141059,142.417964,4.333517,95.494681,,9,0.258094,4m 2s,0.3,2017-2019,
13,SVM,NO2 and SO2,4.583048,130.581039,2.683639,73.12636,,9,0.419195,58s,0.3,2017-2019,


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12
2,Random Forest,NO WIND,4.699566,75.221,3.022,55.833486,12.0,7,0.535983,10m 53s,0.3,2017-2019,
5,Gradient Boosting,NO WIND,5.233167,86.569208,3.609033,69.714864,12.0,7,0.474806,1m 59s,0.3,2017-2019,
8,ANN,NO WIND,5.196109,127.776182,3.358999,75.200244,,7,0.453496,1m 21s,0.3,2017-2019,
11,Linear Regression,NO WIND,6.340251,132.535127,4.5763,98.335372,,7,0.252679,3m 3s,0.3,2017-2019,
14,SVM,NO WIND,5.048022,148.362647,2.856737,96.70872,,7,0.337406,46s,0.3,2017-2019,


# Rank each individual Statistical Arbiter 
I assign rank 1 to the lowest value for MAE, ME, MEAE, MSE because the lower these statistics, the better the algorithm combination with rank 5 being assigned to the highest value. <br>
For R2 I assign rank 1 to the highest value because the higher the correlation coefficient, the better the algorithm combination. Likewise I assign rank 5 to the lowest R2 algorithm combination.

In [6]:
def Rankingdf(Data):
    DataR = Data
    DataR['MAE Rank'] = DataR['MAE'].rank(method='max')
    DataR['ME Rank'] = DataR['ME'].rank(method='max')
    DataR['MEAE Rank'] = DataR['MEAE'].rank(method='max')
    DataR['MSE Rank'] = DataR['MSE'].rank(method='max')
    DataR['R2 Rank'] = DataR['R2'].rank(ascending=False)


    return DataR
    
Rank_Base = Rankingdf(RankBase)
Rank_Pollutant = Rankingdf(RankPollutant)
Rank_NoWind = Rankingdf(RankNoWind)



display(Rank_Base)
display(Rank_Pollutant)
display(Rank_NoWind)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  after removing the cwd from sys.path.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the ca

Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12,MAE Rank,ME Rank,MEAE Rank,MSE Rank,R2 Rank
0,Random Forest,,5.061335,142.517,2.9965,84.115567,12.0,7,0.436529,9m 36s,0.3,2017-2019,,2.0,4.0,3.0,2.0,1.0
3,Gradient Boosting,,4.934718,151.821084,2.973308,85.366033,12.0,7,0.36799,2m 16s,0.3,2017-2019,,1.0,5.0,2.0,4.0,3.0
6,ANN,,5.426977,104.808756,3.560898,77.242005,,7,0.427781,1m 22s,0.3,2017-2019,,4.0,1.0,4.0,1.0,2.0
9,Linear Regression,,6.192442,140.953068,4.530404,92.575934,,7,0.213066,2m 33s,0.3,2017-2019,,5.0,3.0,5.0,5.0,5.0
12,SVM,,5.097907,135.409361,2.929126,84.782734,,7,0.304188,53s,0.3,2017-2019,,3.0,2.0,1.0,3.0,4.0


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12,MAE Rank,ME Rank,MEAE Rank,MSE Rank,R2 Rank
1,Random Forest,NO2 and SO2,4.615564,134.613563,2.99544,59.428183,12.0,9,0.53907,17m 1s,0.3,2017-2019,,3.0,4.0,3.0,2.0,1.0
4,Gradient Boosting,NO2 and SO2,4.535282,83.792687,2.749209,61.996428,12.0,9,0.516236,3m 10s,0.3,2017-2019,,1.0,1.0,2.0,3.0,2.0
7,ANN,NO2 and SO2,4.758106,99.815977,3.181172,58.384324,,9,0.511157,1m 22s,0.3,2017-2019,,4.0,2.0,4.0,1.0,3.0
10,Linear Regression,NO2 and SO2,6.141059,142.417964,4.333517,95.494681,,9,0.258094,4m 2s,0.3,2017-2019,,5.0,5.0,5.0,5.0,5.0
13,SVM,NO2 and SO2,4.583048,130.581039,2.683639,73.12636,,9,0.419195,58s,0.3,2017-2019,,2.0,3.0,1.0,4.0,4.0


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12,MAE Rank,ME Rank,MEAE Rank,MSE Rank,R2 Rank
2,Random Forest,NO WIND,4.699566,75.221,3.022,55.833486,12.0,7,0.535983,10m 53s,0.3,2017-2019,,1.0,1.0,2.0,1.0,1.0
5,Gradient Boosting,NO WIND,5.233167,86.569208,3.609033,69.714864,12.0,7,0.474806,1m 59s,0.3,2017-2019,,4.0,2.0,4.0,2.0,2.0
8,ANN,NO WIND,5.196109,127.776182,3.358999,75.200244,,7,0.453496,1m 21s,0.3,2017-2019,,3.0,3.0,3.0,3.0,3.0
11,Linear Regression,NO WIND,6.340251,132.535127,4.5763,98.335372,,7,0.252679,3m 3s,0.3,2017-2019,,5.0,4.0,5.0,5.0,5.0
14,SVM,NO WIND,5.048022,148.362647,2.856737,96.70872,,7,0.337406,46s,0.3,2017-2019,,2.0,5.0,1.0,4.0,4.0


# Final Ranking
Having ranked each individual statistical arbiter, I now sum these ranks and assign a rank number of 1 to 5 based on the final tally. Rank 1 was awarded to the lowest total sum since this algorithm combination has the best combination of statistical arbiters.

In [7]:
def finalrank(Data):
    Rank = Data
    column_list = list(Rank)
    
    column_list.remove("Algorithm") 
    column_list.remove('MAE') 
    column_list.remove('ME') 
    column_list.remove('MEAE')
    column_list.remove('MSE') 
    column_list.remove('Max Depth') 
    column_list.remove('N Features') 
    column_list.remove('R2') 
    column_list.remove('Run Time') 
    column_list.remove('Test-Train Split') 
    column_list.remove('Year')
    #Rank.drop('Sum', axis=0, inplace=True)
    #Rank.drop('Final Rank', axis=0, inplace=True)
    Rank["Sum"] = Rank[column_list].sum(axis=1)
    Rank.drop('Sum', axis=1, inplace=True)
    #Rank.drop('Final Rank', axis=1, inplace=True)
    Rank["Sum"] = Rank[column_list].sum(axis=1)
    Rank['Final Rank'] = Rank['Sum'].rank(ascending=True)
    display(Rank)


In [8]:
finalrank(Rank_Base)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  errors=errors)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12,MAE Rank,ME Rank,MEAE Rank,MSE Rank,R2 Rank,Sum,Final Rank
0,Random Forest,,5.061335,142.517,2.9965,84.115567,12.0,7,0.436529,9m 36s,0.3,2017-2019,,2.0,4.0,3.0,2.0,1.0,12.0,1.5
3,Gradient Boosting,,4.934718,151.821084,2.973308,85.366033,12.0,7,0.36799,2m 16s,0.3,2017-2019,,1.0,5.0,2.0,4.0,3.0,15.0,4.0
6,ANN,,5.426977,104.808756,3.560898,77.242005,,7,0.427781,1m 22s,0.3,2017-2019,,4.0,1.0,4.0,1.0,2.0,12.0,1.5
9,Linear Regression,,6.192442,140.953068,4.530404,92.575934,,7,0.213066,2m 33s,0.3,2017-2019,,5.0,3.0,5.0,5.0,5.0,23.0,5.0
12,SVM,,5.097907,135.409361,2.929126,84.782734,,7,0.304188,53s,0.3,2017-2019,,3.0,2.0,1.0,3.0,4.0,13.0,3.0


In [9]:
finalrank(Rank_Pollutant)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12,MAE Rank,ME Rank,MEAE Rank,MSE Rank,R2 Rank,Sum,Final Rank
1,Random Forest,NO2 and SO2,4.615564,134.613563,2.99544,59.428183,12.0,9,0.53907,17m 1s,0.3,2017-2019,,3.0,4.0,3.0,2.0,1.0,13.0,2.0
4,Gradient Boosting,NO2 and SO2,4.535282,83.792687,2.749209,61.996428,12.0,9,0.516236,3m 10s,0.3,2017-2019,,1.0,1.0,2.0,3.0,2.0,9.0,1.0
7,ANN,NO2 and SO2,4.758106,99.815977,3.181172,58.384324,,9,0.511157,1m 22s,0.3,2017-2019,,4.0,2.0,4.0,1.0,3.0,14.0,3.5
10,Linear Regression,NO2 and SO2,6.141059,142.417964,4.333517,95.494681,,9,0.258094,4m 2s,0.3,2017-2019,,5.0,5.0,5.0,5.0,5.0,25.0,5.0
13,SVM,NO2 and SO2,4.583048,130.581039,2.683639,73.12636,,9,0.419195,58s,0.3,2017-2019,,2.0,3.0,1.0,4.0,4.0,14.0,3.5


In [10]:
finalrank(Rank_NoWind)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Algorithm,Feature Added,MAE,ME,MEAE,MSE,Max Depth,N Features,R2,Run Time,Test-Train Split,Year,Unnamed: 12,MAE Rank,ME Rank,MEAE Rank,MSE Rank,R2 Rank,Sum,Final Rank
2,Random Forest,NO WIND,4.699566,75.221,3.022,55.833486,12.0,7,0.535983,10m 53s,0.3,2017-2019,,1.0,1.0,2.0,1.0,1.0,6.0,1.0
5,Gradient Boosting,NO WIND,5.233167,86.569208,3.609033,69.714864,12.0,7,0.474806,1m 59s,0.3,2017-2019,,4.0,2.0,4.0,2.0,2.0,14.0,2.0
8,ANN,NO WIND,5.196109,127.776182,3.358999,75.200244,,7,0.453496,1m 21s,0.3,2017-2019,,3.0,3.0,3.0,3.0,3.0,15.0,3.0
11,Linear Regression,NO WIND,6.340251,132.535127,4.5763,98.335372,,7,0.252679,3m 3s,0.3,2017-2019,,5.0,4.0,5.0,5.0,5.0,24.0,5.0
14,SVM,NO WIND,5.048022,148.362647,2.856737,96.70872,,7,0.337406,46s,0.3,2017-2019,,2.0,5.0,1.0,4.0,4.0,16.0,4.0
