# Importing libraries and classes

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.ensemble import RandomForestRegressor as SKLearnRandomForestRegressor
from sklearn.tree import DecisionTreeRegressor as SKLearnDecisionTreeRegressor
from RandomForestRegressor import RandomForestRegressor
from DecisionTreeRegressor import DecisionTreeRegressor

# Add your required imports here

# Reading and splitting the data

In [2]:
df = pd.read_csv('./NSO_Population_Sex_dataset/NSO_POPULATION_DATA_CLEANED.csv')

feature_cols = ['District', 'Sex', 'Year', 'Population_Growth_Rate','Average_Population']
X = pd.get_dummies(df[feature_cols], columns=['District', 'Sex'])
y = df["Population"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# SKLearn Decision Trees

In [3]:
SKLearn_Decision_Tree_Regressor = SKLearnDecisionTreeRegressor(max_depth=100, min_samples_split=2, min_samples_leaf=5)

SKLearn_Decision_Tree_Regressor.fit(X_train, y_train)

SKLearn_Decision_Tree_Regressor_Prediction = SKLearn_Decision_Tree_Regressor.predict(X_test)

SKLearn_Decision_Tree_Regressor_Prediction_MAE = mean_absolute_error(y_test, SKLearn_Decision_Tree_Regressor_Prediction)
SKLearn_Decision_Tree_Regressor_Prediction_MSE = mean_squared_error(y_test, SKLearn_Decision_Tree_Regressor_Prediction)

print(f"SKLearn Decision Tree Regressor Mean Absolute Error: {SKLearn_Decision_Tree_Regressor_Prediction_MAE}")
print(f"SKLearn Decision Tree Regressor Mean Squared Error: {SKLearn_Decision_Tree_Regressor_Prediction_MSE}")

SKLearn_Decision_Tree_Regressor_Results_DF = pd.DataFrame({'Actual': y_test, 'Predicted': SKLearn_Decision_Tree_Regressor_Prediction})
print(f"\nSKLearn Decision Tree Predictions:\n{SKLearn_Decision_Tree_Regressor_Results_DF}")

SKLearn Decision Tree Regressor Mean Absolute Error: 0.019414911211287852
SKLearn Decision Tree Regressor Mean Squared Error: 0.001960325834723594

SKLearn Decision Tree Predictions:
       Actual  Predicted
154  0.274949   0.293029
136  0.240905   0.223126
137  0.254603   0.293029
173  0.018144   0.020760
64   0.192490   0.202359
18   0.334591   0.336222
51   0.608144   0.612045
170  0.009978   0.009146
54   0.614832   0.612045
76   0.285545   0.258887
15   0.378399   0.354900
40   0.642014   0.674058
63   0.894065   0.862499
82   0.201327   0.202359
143  0.364016   0.197714
114  0.182445   0.176871
31   0.351231   0.354900
159  0.397456   0.197714
147  0.199997   0.195295
150  0.221378   0.223126
58   0.720149   0.674058
180  0.003344   0.002375
191  0.029411   0.020760
133  0.216946   0.223126
126  0.211856   0.199165
90   0.241455   0.258887
112  0.182256   0.176871
10   0.333920   0.333571
94   0.267079   0.258887
131  0.201193   0.195295
29   0.345026   0.354900
161  0.005076   0

# Custom Decision Trees

In [4]:
Custom_Decision_Tree_Regressor = DecisionTreeRegressor(max_depth=100, min_samples_split=2)

Custom_Decision_Tree_Regressor.fit(X_train.values, y_train.values)

Custom_Decision_Tree_Regressor_Prediction = Custom_Decision_Tree_Regressor.predict(X_test.values)

Custom_Decision_Tree_Regressor_Prediction_MAE = mean_absolute_error(y_test, Custom_Decision_Tree_Regressor_Prediction)
Custom_Decision_Tree_Regressor_Prediction_MSE = mean_squared_error(y_test, Custom_Decision_Tree_Regressor_Prediction)

print(f"Custom Decision Tree Regressor Mean Absolute Error: {Custom_Decision_Tree_Regressor_Prediction_MAE}")
print(f"Custom Decision Tree Regressor Mean Squared Error: {Custom_Decision_Tree_Regressor_Prediction_MSE}")

Custom_Decision_Tree_Regressor_Results_DF = pd.DataFrame({'Actual': y_test, 'Predicted': Custom_Decision_Tree_Regressor_Prediction})
print(f"\nCustom Decision Tree Predictions:\n{Custom_Decision_Tree_Regressor_Results_DF}")

Custom Decision Tree Regressor Mean Absolute Error: 0.012880602786728148
Custom Decision Tree Regressor Mean Squared Error: 0.0013511731482142384

Custom Decision Tree Predictions:
       Actual  Predicted
154  0.274949   0.273432
136  0.240905   0.237668
137  0.254603   0.254724
173  0.018144   0.019675
64   0.192490   0.194303
18   0.334591   0.333571
51   0.608144   0.611689
170  0.009978   0.008192
54   0.614832   0.631230
76   0.285545   0.273888
15   0.378399   0.352574
40   0.642014   0.652731
63   0.894065   0.890520
82   0.201327   0.205128
143  0.364016   0.222412
114  0.182445   0.183989
31   0.351231   0.352574
159  0.397456   0.222412
147  0.199997   0.209815
150  0.221378   0.225057
58   0.720149   0.731819
180  0.003344   0.003022
191  0.029411   0.024187
133  0.216946   0.213749
126  0.211856   0.207612
90   0.241455   0.244705
112  0.182256   0.179987
10   0.333920   0.333557
94   0.267079   0.268852
131  0.201193   0.209815
29   0.345026   0.357865
161  0.005076   0.0

# Random Forest Regressor using SKLearn
https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

In [5]:
SKLearn_Random_Forest_Regressor = SKLearnRandomForestRegressor() # https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestRegressor.html

SKLearn_Random_Forest_Regressor.fit(X_train, y_train)

SKLearn_Random_Forest_Regressor_Prediction = SKLearn_Random_Forest_Regressor.predict(X_test)

SKLearn_Random_Forest_Regressor_Prediction_MAE = mean_absolute_error(y_test, SKLearn_Random_Forest_Regressor_Prediction)
SKLearn_Random_Forest_Regressor_Prediction_MSE = mean_squared_error(y_test, SKLearn_Random_Forest_Regressor_Prediction)

print(f"SKLearn Random Forest Regressor Mean Absolute Error: {SKLearn_Random_Forest_Regressor_Prediction_MAE}")
print(f"SKLearn Random Forest Regressor Mean Squared Error: {SKLearn_Random_Forest_Regressor_Prediction_MSE}")

SKLearn_Random_Forest_Regressor_Results_DF = pd.DataFrame({'Actual': y_test, 'Predicted': SKLearn_Random_Forest_Regressor_Prediction})
print(f"\nSKLearn Random Forest Predictions:\n{SKLearn_Random_Forest_Regressor_Results_DF}")


SKLearn Random Forest Regressor Mean Absolute Error: 0.011904015146115167
SKLearn Random Forest Regressor Mean Squared Error: 0.0007689667154152758

SKLearn Random Forest Predictions:
       Actual  Predicted
154  0.274949   0.276633
136  0.240905   0.233046
137  0.254603   0.270337
173  0.018144   0.018779
64   0.192490   0.200028
18   0.334591   0.331705
51   0.608144   0.607050
170  0.009978   0.008299
54   0.614832   0.611928
76   0.285545   0.265249
15   0.378399   0.352429
40   0.642014   0.680541
63   0.894065   0.913723
82   0.201327   0.205412
143  0.364016   0.264113
114  0.182445   0.181766
31   0.351231   0.349136
159  0.397456   0.270736
147  0.199997   0.203322
150  0.221378   0.217261
58   0.720149   0.724871
180  0.003344   0.004406
191  0.029411   0.023371
133  0.216946   0.211266
126  0.211856   0.212342
90   0.241455   0.250233
112  0.182256   0.179723
10   0.333920   0.332694
94   0.267079   0.257963
131  0.201193   0.199627
29   0.345026   0.345939
161  0.005076   

# Custom Random Forest Regressor using SKLearn Decision Trees

In [6]:
SKLearn_Decision_Trees_Random_Forest_Regressor = RandomForestRegressor(n_estimators=100, max_depth=None, custom=False)
SKLearn_Decision_Trees_Random_Forest_Regressor.fit(X_train, y_train)

SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction = SKLearn_Decision_Trees_Random_Forest_Regressor.predict(X_test)

SKLearn_Decision_Trees_Random_Forest_Regressor_MAE = mean_absolute_error(y_test, SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction)
SKLearn_Decision_Trees_Random_Forest_Regressor_MSE = mean_squared_error(y_test, SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction)

print(f"Custom Random Forest Regressor using SKLearn Decision Trees Mean Absolute Error: {SKLearn_Decision_Trees_Random_Forest_Regressor_MAE}")
print(f"Custom Random Forest Regressor using SKLearn Decision Trees Mean Squared Error: {SKLearn_Decision_Trees_Random_Forest_Regressor_MSE}")

SKLearn_Decision_Trees_Random_Forest_Regressor_Results_DF = pd.DataFrame({'Actual': y_test, 'Predicted': SKLearn_Decision_Trees_Random_Forest_Regressor_Prediction})
print(f"\nCustom Random Forest Regressor using SKLearn Decision Trees Predictions:\n{SKLearn_Decision_Trees_Random_Forest_Regressor_Results_DF}")

Custom Random Forest Regressor using SKLearn Decision Trees Mean Absolute Error: 0.011632225990614945
Custom Random Forest Regressor using SKLearn Decision Trees Mean Squared Error: 0.0008405946323864392

Custom Random Forest Regressor using SKLearn Decision Trees Predictions:
       Actual  Predicted
154  0.274949   0.270688
136  0.240905   0.231068
137  0.254603   0.268476
173  0.018144   0.018636
64   0.192490   0.198342
18   0.334591   0.333213
51   0.608144   0.606215
170  0.009978   0.008345
54   0.614832   0.610003
76   0.285545   0.264696
15   0.378399   0.351789
40   0.642014   0.680066
63   0.894065   0.894728
82   0.201327   0.204951
143  0.364016   0.257889
114  0.182445   0.181991
31   0.351231   0.349882
159  0.397456   0.263425
147  0.199997   0.204956
150  0.221378   0.219749
58   0.720149   0.724677
180  0.003344   0.004216
191  0.029411   0.024845
133  0.216946   0.212638
126  0.211856   0.211936
90   0.241455   0.249272
112  0.182256   0.179470
10   0.333920   0.3335

# Custom Random Forest Regressor

In [7]:
Custom_Random_Forest_Regressor = RandomForestRegressor(n_estimators=100, max_depth=100)
Custom_Random_Forest_Regressor.fit(X_train, y_train)

Custom_Random_Forest_Regressor_Prediction = Custom_Random_Forest_Regressor.predict(X_test)

Custom_Random_Forest_Regressor_MAE = mean_absolute_error(y_test, Custom_Random_Forest_Regressor_Prediction)
Custom_Random_Forest_Regressor_MSE = mean_squared_error(y_test, Custom_Random_Forest_Regressor_Prediction)

print(f"Custom Random Forest Regressor Mean Absolute Error: {Custom_Random_Forest_Regressor_MAE}")
print(f"Custom Random Forest Regressor Mean Squared Error: {Custom_Random_Forest_Regressor_MSE}")

Custom_Random_Forest_Regressor_Results_DF = pd.DataFrame({'Actual': y_test, 'Predicted': Custom_Random_Forest_Regressor_Prediction})
print(f"\nCustom Random Forest Regressor Predictions:\n{Custom_Random_Forest_Regressor_Results_DF}")

Custom Random Forest Regressor Mean Absolute Error: 0.01363101110579657
Custom Random Forest Regressor Mean Squared Error: 0.000784614494030972

Custom Random Forest Regressor Predictions:
       Actual  Predicted
154  0.274949   0.290437
136  0.240905   0.231655
137  0.254603   0.263411
173  0.018144   0.021050
64   0.192490   0.193372
18   0.334591   0.333042
51   0.608144   0.609713
170  0.009978   0.009741
54   0.614832   0.622588
76   0.285545   0.263066
15   0.378399   0.359308
40   0.642014   0.663760
63   0.894065   0.937508
82   0.201327   0.205553
143  0.364016   0.269496
114  0.182445   0.181983
31   0.351231   0.357987
159  0.397456   0.271558
147  0.199997   0.213323
150  0.221378   0.222663
58   0.720149   0.742965
180  0.003344   0.002910
191  0.029411   0.024197
133  0.216946   0.216538
126  0.211856   0.221352
90   0.241455   0.263883
112  0.182256   0.180214
10   0.333920   0.333369
94   0.267079   0.279239
131  0.201193   0.209388
29   0.345026   0.356112
161  0.0050