In [28]:
import pandas as pd
import warnings
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
# configs
warnings.filterwarnings('ignore')

In [29]:
data= pd.read_csv('global-data-on-sustainable-energy.csv')

#### Data Quality Report

##### Continuous features report 

In [30]:
def build_continuous_features_report(data_df):

    """Build tabular report for continuous features"""

    stats = {
        "Count": len,
        "Miss %": lambda df: df.isna().sum() / len(df) * 100,
        "Card.": lambda df: df.nunique(),
        "Min": lambda df: df.min(),
        "1st Qrt.": lambda df: df.quantile(0.25),
        "Mean": lambda df: df.mean(),
        "Median": lambda df: df.median(),
        "3rd Qrt": lambda df: df.quantile(0.75),
        "Max": lambda df: df.max(),
        "Std. Dev.": lambda df: df.std(),
    }

    contin_feat_names = data_df.select_dtypes("number").columns
    continuous_data_df = data_df[contin_feat_names]

    report_df = pd.DataFrame(index=contin_feat_names, columns=stats.keys())

    for stat_name, fn in stats.items():
        # NOTE: ignore warnings for empty features
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)
            report_df[stat_name] = fn(continuous_data_df)

    return report_df


In [31]:
# build continuous features report
con_report_df = build_continuous_features_report(data)
con_report_df

Unnamed: 0,Count,Miss %,Card.,Min,1st Qrt.,Mean,Median,3rd Qrt,Max,Std. Dev.
Year,3649,0.0,21,2000.0,2005.0,2010.038,2010.0,2015.0,2020.0,6.054228
Access to electricity (% of population),3649,0.274048,2040,1.252269,59.80089,78.9337,98.36157,100.0,100.0,30.27554
Access to clean fuels for cooking,3649,4.631406,896,0.0,23.175,63.25529,83.15,100.0,100.0,39.04366
Renewable-electricity-generating-capacity-per-capita,3649,25.513839,2110,0.0,3.54,113.1375,32.91,112.21,3060.19,244.1673
Financial flows to developing countries (US $),3649,57.248561,1017,0.0,260000.0,94224000.0,5665000.0,55347500.0,5202310000.0,298154400.0
Renewable energy share in the total final energy consumption (%),3649,5.316525,2587,0.0,6.515,32.63816,23.3,55.245,96.04,29.8949
Electricity from fossil fuels (TWh),3649,0.5755,1859,0.0,0.29,70.365,2.97,26.8375,5184.13,348.0519
Electricity from nuclear (TWh),3649,3.453001,547,0.0,0.0,13.45019,0.0,0.0,809.41,73.00662
Electricity from renewables (TWh),3649,0.5755,1533,0.0,0.04,23.96801,1.47,9.6,2184.94,104.4311
Low-carbon electricity (% electricity),3649,1.151,2647,0.0,2.877847,36.80118,27.86507,64.40379,100.0,34.31488


In [32]:
print("categorical features are :")
data.select_dtypes(exclude="number").columns

categorical features are :


Index(['Entity', 'Density\n(P/Km2)'], dtype='object')

##### Categorical features Report

In [33]:
def build_categorical_features_report(data_df):

    """Build tabular report for categorical features"""

    def _mode(df):
        return df.apply(lambda ft: ",".join(ft.mode().to_list()))

    def _mode_freq(df):
        return df.apply(lambda ft: ft.value_counts()[ft.mode()].sum())

    def _second_mode(df):
        return df.apply(lambda ft: ",".join(ft[~ft.isin(ft.mode())].mode().to_list()))

    def _second_mode_freq(df):
        return df.apply(
            lambda ft: ft[~ft.isin(ft.mode())]
            .value_counts()[ft[~ft.isin(ft.mode())].mode()]
            .sum()
        )

    stats = {
        "Count": len,
        "Miss %": lambda df: df.isna().sum() / len(df) * 100,
        "Card.": lambda df: df.nunique(),
        "Mode": _mode,
        "Mode Freq": _mode_freq,
        "Mode %": lambda df: _mode_freq(df) / len(df) * 100,
        "2nd Mode": _second_mode,
        "2nd Mode Freq": _second_mode_freq,
        "2nd Mode %": lambda df: _second_mode_freq(df) / len(df) * 100,
    }

    cat_feat_names = data_df.select_dtypes(exclude="number").columns
    cat_data_df = data_df[cat_feat_names]

    report_df = pd.DataFrame(index=cat_feat_names, columns=stats.keys())

    for stat_name, fn in stats.items():
        # NOTE: ignore warnings for empty features
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", category=RuntimeWarning)
            report_df[stat_name] = fn(cat_data_df)

    return report_df


In [34]:
# build categorical feature report
cat_report_df = build_categorical_features_report(data)
cat_report_df

Unnamed: 0,Count,Miss %,Card.,Mode,Mode Freq,Mode %,2nd Mode,2nd Mode Freq,2nd Mode %
Entity,3649,0.0,176,"Afghanistan,Albania,Algeria,Angola,Antigua and...",3612,98.986024,"Montenegro,Serbia",28,0.767334
Density\n(P/Km2),3649,0.027405,124,25,147,4.028501,18,113,3.096739


##### Checking duplicates

In [35]:
# Check for duplicate rows
num_duplicates = data.duplicated().sum()
print("Number of Duplicate Rows:", num_duplicates)

Number of Duplicate Rows: 0


##### Checking outliers

In [36]:
def detect_outliers(col):
    """Detect outliers in a dataframe column

    Args:
        col (pd.Series): A dataframe column

    Returns:
        pd.Series: Outliers
    """
    Q1 = np.percentile(col, 25)
    Q3 = np.percentile(col, 75)
    IQR = Q3 - Q1
    outlier_step = IQR * 1.5

    return col[(col < Q1 - outlier_step) | (col > Q3 + outlier_step)]


In [37]:
outliers_df = (
    data.select_dtypes("number")
    .apply(lambda col: detect_outliers(col).size)
    .rename("Num outliers")
    .to_frame()
).query("`Num outliers` > 0")

outliers_df["Percent outliers"] = outliers_df["Num outliers"] / len(data) * 100
outliers_df.sort_values(by="Percent outliers", ascending=False)


Unnamed: 0,Num outliers,Percent outliers
Primary energy consumption per capita (kWh/person),237,6.49493


#### Data Quality Plan

#### Data Preparation

Convert the types to the desired ones

In [38]:
def remove_functuation(density):
    return density.replace(',','')
data['Entity']=data['Entity'].astype(str)
data['Density\\n(P/Km2)']=data['Density\\n(P/Km2)'].astype(str)
data['Density\\n(P/Km2)']=data['Density\\n(P/Km2)'].apply(remove_functuation)
data['Density\\n(P/Km2)']=data['Density\\n(P/Km2)'].astype(float)

##### Handling missing values

In [39]:
#Handling missing values in continuous features
missing_con_cols = con_report_df.query("`Miss %` > 0.0").index
#print(missing_con_cols)
feature_with_null=[column for column in missing_con_cols if column not in ['Year','Primary energy consumption per capita (kWh/person)']]
#print(feature_with_null)
# Replacing values with mean
data.fillna(data[feature_with_null].mean(),inplace=True)

In [40]:
# Handling missing values in categorical features
cat_missing = cat_report_df.query("`Miss %` > 0").index


cat_fillna_vals = data[cat_missing].mode().squeeze()
data = data.fillna(cat_fillna_vals)

In [41]:
data.isna().sum()

Entity                                                              0
Year                                                                0
Access to electricity (% of population)                             0
Access to clean fuels for cooking                                   0
Renewable-electricity-generating-capacity-per-capita                0
Financial flows to developing countries (US $)                      0
Renewable energy share in the total final energy consumption (%)    0
Electricity from fossil fuels (TWh)                                 0
Electricity from nuclear (TWh)                                      0
Electricity from renewables (TWh)                                   0
Low-carbon electricity (% electricity)                              0
Primary energy consumption per capita (kWh/person)                  0
Energy intensity level of primary energy (MJ/$2017 PPP GDP)         0
Value_co2_emissions_kt_by_country                                   0
Renewables (% equiva

In [42]:
data.isnull().sum()

Entity                                                              0
Year                                                                0
Access to electricity (% of population)                             0
Access to clean fuels for cooking                                   0
Renewable-electricity-generating-capacity-per-capita                0
Financial flows to developing countries (US $)                      0
Renewable energy share in the total final energy consumption (%)    0
Electricity from fossil fuels (TWh)                                 0
Electricity from nuclear (TWh)                                      0
Electricity from renewables (TWh)                                   0
Low-carbon electricity (% electricity)                              0
Primary energy consumption per capita (kWh/person)                  0
Energy intensity level of primary energy (MJ/$2017 PPP GDP)         0
Value_co2_emissions_kt_by_country                                   0
Renewables (% equiva

#### Handling Outliers

##### Plot heatmap

In [44]:
#https://stackoverflow.com/questions/18689823/pandas-dataframe-replace-nan-values-with-average-of-columns
#https://www.geeksforgeeks.org/remove-multiple-elements-from-a-list-in-python/
#https://scikit-learn.org/stable/modules/generated/sklearn.impute.KNNImputer.html
#https://medium.com/@bhanupsingh484/handling-missing-data-with-knn-imputer-927d49b09015
#https://pandas.pydata.org/docs/user_guide/merging.html
'''
from sklearn.impute import KNNImputer
imputer = KNNImputer(n_neighbors=2)
columns=data.columns.tolist()
feature_with_null=[column for column in columns if column not in ['Entity','Year','Primary energy consumption per capita (kWh/person)']]
data_to_impute = data[feature_with_null]
data_remaining = data[['Entity', 'Year', 'Primary energy consumption per capita (kWh/person)']]
data_imputed = pd.DataFrame(imputer.fit_transform(data_to_impute), columns=feature_with_null)
data_combined = pd.concat([data_imputed, data_remaining], axis=1)
data=data_combined
missing_values = data.isnull().sum()
print(missing_values)
'''

"\nfrom sklearn.impute import KNNImputer\nimputer = KNNImputer(n_neighbors=2)\ncolumns=data.columns.tolist()\nfeature_with_null=[column for column in columns if column not in ['Entity','Year','Primary energy consumption per capita (kWh/person)']]\ndata_to_impute = data[feature_with_null]\ndata_remaining = data[['Entity', 'Year', 'Primary energy consumption per capita (kWh/person)']]\ndata_imputed = pd.DataFrame(imputer.fit_transform(data_to_impute), columns=feature_with_null)\ndata_combined = pd.concat([data_imputed, data_remaining], axis=1)\ndata=data_combined\nmissing_values = data.isnull().sum()\nprint(missing_values)\n"

In [45]:
df = data
df.head()

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),...,Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
0,Afghanistan,2000,1.613591,6.2,9.22,20000.0,44.99,0.16,0.0,0.31,...,302.59482,1.64,760.0,11.986707,3.44161,13283.774348,60.0,652230.0,33.93911,67.709953
1,Afghanistan,2001,4.074574,7.2,8.86,130000.0,45.6,0.09,0.0,0.5,...,236.89185,1.74,730.0,11.986707,3.44161,13283.774348,60.0,652230.0,33.93911,67.709953
2,Afghanistan,2002,9.409158,8.2,8.47,3950000.0,37.83,0.13,0.0,0.56,...,210.86215,1.4,1029.999971,11.986707,3.44161,179.426579,60.0,652230.0,33.93911,67.709953
3,Afghanistan,2003,14.738506,9.5,8.09,25970000.0,36.66,0.31,0.0,0.63,...,229.96822,1.4,1220.000029,11.986707,8.832278,190.683814,60.0,652230.0,33.93911,67.709953
4,Afghanistan,2004,20.064968,10.9,7.75,94224000.0,44.24,0.33,0.0,0.56,...,204.23125,1.2,1029.999971,11.986707,1.414118,211.382074,60.0,652230.0,33.93911,67.709953


In [46]:
def rmOutliers(df, columns):
    for column in columns:
        Q1 = df[column].quantile(0.25)
        Q3 = df[column].quantile(0.75)
        IQR = Q3 - Q1
        lower_bound = Q1 - 1.5 * IQR
        upper_bound = Q3 + 1.5 * IQR
        df = df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]
    return df

# List of columns to check for outliers
columns_to_check = [
    'Access to electricity (% of population)',
    'Access to clean fuels for cooking',
    'Renewable-electricity-generating-capacity-per-capita',
    'Financial flows to developing countries (US $)',
    'Renewable energy share in the total final energy consumption (%)',
    'Electricity from fossil fuels (TWh)',
    'Electricity from nuclear (TWh)',
    'Electricity from renewables (TWh)',
    'Primary energy consumption per capita (kWh/person)',
    'Energy intensity level of primary energy (MJ/$2017 PPP GDP)',
    'Value_co2_emissions_kt_by_country',
    'Renewables (% equivalent primary energy)',
    'gdp_growth',
    'gdp_per_capita',
]
print (columns_to_check)

# Apply the function to each group of entities
df_cleaned = df.groupby('Entity').apply(lambda x: rmOutliers(x, columns_to_check)).reset_index(drop=True)

['Access to electricity (% of population)', 'Access to clean fuels for cooking', 'Renewable-electricity-generating-capacity-per-capita', 'Financial flows to developing countries (US $)', 'Renewable energy share in the total final energy consumption (%)', 'Electricity from fossil fuels (TWh)', 'Electricity from nuclear (TWh)', 'Electricity from renewables (TWh)', 'Primary energy consumption per capita (kWh/person)', 'Energy intensity level of primary energy (MJ/$2017 PPP GDP)', 'Value_co2_emissions_kt_by_country', 'Renewables (% equivalent primary energy)', 'gdp_growth', 'gdp_per_capita']


In [47]:
print(f"Total Outliers Removed considering for each country are:{df.count()-df_cleaned.count()}")

Total Outliers Removed considering for each country are:Entity                                                              1167
Year                                                                1167
Access to electricity (% of population)                             1167
Access to clean fuels for cooking                                   1167
Renewable-electricity-generating-capacity-per-capita                1167
Financial flows to developing countries (US $)                      1167
Renewable energy share in the total final energy consumption (%)    1167
Electricity from fossil fuels (TWh)                                 1167
Electricity from nuclear (TWh)                                      1167
Electricity from renewables (TWh)                                   1167
Low-carbon electricity (% electricity)                              1167
Primary energy consumption per capita (kWh/person)                  1167
Energy intensity level of primary energy (MJ/$2017 PPP GDP)         

In [48]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [49]:
scaler= StandardScaler()
columns=data.select_dtypes(include=['float','integer']).columns
data[columns]=scaler.fit_transform(data[columns])

In [50]:
label_encoder=LabelEncoder()
data['Entity']=label_encoder.fit_transform(data['Entity'])

In [51]:
target_variables = ['Value_co2_emissions_kt_by_country', 'Renewable energy share in the total final energy consumption (%)']


In [52]:
# importing all the necessary models that are required to perform the regression
#https://stackoverflow.com/questions/59489830/select-best-parameters-for-regression-model-using-gridsearch
#https://www.kdnuggets.com/hyperparameter-tuning-gridsearchcv-and-randomizedsearchcv-explained
#https://www.analyticsvidhya.com/blog/2022/11/hyperparameter-tuning-using-randomized-search/
#https://dev.to/newbie_coder/decision-tree-regression-a-comprehensive-guide-with-python-code-examples-and-hyperparameter-tuning-1f0f
#https://stats.stackexchange.com/questions/269053/how-to-select-hyperparameters-for-svm-regression-after-grid-search
#https://www.geeksforgeeks.org/random-forest-hyperparameter-tuning-in-python/
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import randint,uniform

models={
    'Linear Regression':LinearRegression(),
    'Decision Tree Regression': DecisionTreeRegressor(),
    'Support Vector Regressor':SVR(),
    'Random Forest Regressor':RandomForestRegressor()
}
param_grids = {
    'Linear Regression': {
              "fit_intercept": [True, False],
             },
    'Decision Tree Regression': {
        'max_depth': [None, 5, 10],
        'min_samples_split': randint(2, 10),
        'min_samples_leaf': randint(1, 4)
    },
    'Support Vector Regressor': {
        'kernel': ('linear', 'rbf','poly'), 
        'C':uniform(1.5, 10),
        'gamma': uniform(1e-7, 1e-4),
        'epsilon':uniform(0.1,0.5)
    },
    'Random Forest Regressor': {
         'n_estimators':  randint(25, 150), 
         'max_features': ['sqrt', 'log2', None], 
         'max_depth':  randint(3, 9), 
         'max_leaf_nodes': randint(3, 9),
    }
}

In [53]:
data.head()

Unnamed: 0,Entity,Year,Access to electricity (% of population),Access to clean fuels for cooking,Renewable-electricity-generating-capacity-per-capita,Financial flows to developing countries (US $),Renewable energy share in the total final energy consumption (%),Electricity from fossil fuels (TWh),Electricity from nuclear (TWh),Electricity from renewables (TWh),...,Primary energy consumption per capita (kWh/person),Energy intensity level of primary energy (MJ/$2017 PPP GDP),Value_co2_emissions_kt_by_country,Renewables (% equivalent primary energy),gdp_growth,gdp_per_capita,Density\n(P/Km2),Land Area(Km2),Latitude,Longitude
0,0,-1.658303,-2.557738,-1.496598,-0.493223,-0.483384,0.424678,-0.202319,-0.187525,-0.227228,...,-0.731738,-1.069235,-0.218925,0.0,0.0,9.608959000000001e-17,-0.239313,0.011997,0.649732,0.797336
1,0,-1.493106,-2.476329,-1.470367,-0.494932,-0.482819,0.445651,-0.202521,-0.187525,-0.225403,...,-0.733627,-1.04008,-0.218967,0.0,0.0,9.608959000000001e-17,-0.239313,0.011997,0.649732,0.797336
2,0,-1.32791,-2.299862,-1.444137,-0.496783,-0.463218,0.178505,-0.202406,-0.187525,-0.224827,...,-0.734376,-1.139208,-0.218554,0.0,0.0,-0.6922478,-0.239313,0.011997,0.649732,0.797336
3,0,-1.162713,-2.123568,-1.410037,-0.498586,-0.350228,0.138278,-0.201887,-0.187525,-0.224154,...,-0.733826,-1.139208,-0.218292,0.0,0.992157,-0.6916532,-0.239313,0.011997,0.649732,0.797336
4,0,-0.997517,-1.947369,-1.373314,-0.5002,0.0,0.398892,-0.201829,-0.187525,-0.224827,...,-0.734567,-1.19752,-0.218554,0.0,-0.373162,-0.6905597,-0.239313,0.011997,0.649732,0.797336


In [54]:
#https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
from sklearn.feature_selection import RFE
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pandas as pd

def evaluate_model_with_base_parameters(models_dict):
  for i in range(len(target_variables)):
    print(f"Below are the metrics for the target variable: {target_variables[i]}")
    for name, model in models_dict.items():
      if name == 'Support Vector Regressor':
            # Skipping RFE for SVR as it does not have the feature_importances_ or coef_ attribute
            correlation_matrix =  data.select_dtypes("number").corr()
            target_features = correlation_matrix[target_variables[i]].drop(target_variables[i])
            target_features = target_features[abs(target_features)>0.3].index.tolist()
            X = data.drop(columns=target_features)
            y = data[target_variables[i]]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
            model.fit(X_train, y_train)
            #y_pred = model.predict(X_test)

      else:
            # Applying RFE for other models
            rfe = RFE(estimator=model, n_features_to_select=10)
            X = data.drop(columns=target_variables)
            y = data[target_variables[i]]
            fit = rfe.fit(X, y)
            feature_ranking = pd.DataFrame({'Feature': X.columns, 'Ranking': fit.ranking_}).sort_values(by='Ranking')
            final_features = feature_ranking[feature_ranking['Ranking'] == 1]['Feature'].to_list()
            if 'Entity' not in final_features:
                final_features.append('Entity')
            X = X[final_features]
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
            model.fit(X_train, y_train)
            #y_pred = model.predict(X_test)
      grid_search = RandomizedSearchCV(estimator=model,  param_distributions=param_grids[name], cv=3, scoring='r2',n_iter=4 )
      grid_search.fit(X_train, y_train)
            
      best_model = grid_search.best_estimator_
      y_pred = best_model.predict(X_test)
            
      

      mse = mean_squared_error(y_test, y_pred)
      mae = mean_absolute_error(y_test, y_pred)
      r2 = r2_score(y_test, y_pred)

      print(f"Model: {name}")
      print(f"Best Parameters: {grid_search.best_params_}")
      print(f"Mean squared error is: {mse}")
      print(f"Mean absolute error is: {mae}")
      print(f"R2 score error is: {r2}")
      print("\n")


# now calling the function in order to get the outputs
evaluate_model_with_base_parameters(models)


Below are the metrics for the target variable: Value_co2_emissions_kt_by_country
Model: Linear Regression
Best Parameters: {'fit_intercept': False}
Mean squared error is: 0.08590952550718223
Mean absolute error is: 0.0938037978892197
R2 score error is: 0.913426397905013


Model: Decision Tree Regression
Best Parameters: {'max_depth': None, 'min_samples_leaf': 3, 'min_samples_split': 5}
Mean squared error is: 0.07626052020283375
Mean absolute error is: 0.03427800546745344
R2 score error is: 0.9303028256236923


Model: Support Vector Regressor
Best Parameters: {'C': 6.633516446743177, 'epsilon': 0.3020515996478965, 'gamma': 9.340619190307586e-05, 'kernel': 'rbf'}
Mean squared error is: 0.3489563042035448
Mean absolute error is: 0.26129630281959676
R2 score error is: 0.5025228506016086


Model: Random Forest Regressor
Best Parameters: {'max_depth': 3, 'max_features': None, 'max_leaf_nodes': 6, 'n_estimators': 42}
Mean squared error is: 0.2699573928919001
Mean absolute error is: 0.11027155

# Neural Network Training

In [55]:
%pip install tensorflow

Note: you may need to restart the kernel to use updated packages.


In [56]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [57]:
correlation_matrix =  data.select_dtypes("number").corr()
target_features = correlation_matrix[target_variables[0]].drop(target_variables[0])
target_features = target_features[abs(target_features)>0.3].index.tolist()

In [58]:
X=data[target_features]
y=data[target_variables[0]]

X_train, X_test, y_train, y_test= train_test_split(X,y, random_state=42, test_size=0.2)

In [59]:
neural_network=keras.Sequential([
    layers.Input(shape=(4,)),
    layers.Dense(32, activation='relu'),
    layers.Dense(1)
])

In [60]:
neural_network.compile(optimizer='adam', loss='mean_squared_error')

In [61]:
#https://www.tensorflow.org/tutorials/keras/keras_tuner
'''
import keras_tuner as kt
tuner = kt.Hyperband(neural_network,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro_to_kt')'''

"\nimport keras_tuner as kt\ntuner = kt.Hyperband(neural_network,\n                     objective='val_accuracy',\n                     max_epochs=10,\n                     factor=3,\n                     directory='my_dir',\n                     project_name='intro_to_kt')"

In [62]:
neural_network.fit(X_train, y_train, epochs=40, batch_size=32)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.src.callbacks.History at 0x317822220>

In [63]:
y_pred=neural_network.predict(X_test)

# calculating the scores based on the performance of the neural network

mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)


print(f"Mean squared error (same as neural_network.evaluate()) is: {mse}")
print(f"Mean absolute error is: {mae}")
print(f"R2 score error is: {r2}")

Mean squared error (same as neural_network.evaluate()) is: 0.012056832867077562
Mean absolute error is: 0.056933143876490205
R2 score error is: 0.977576267909473


In [64]:
test_loss = neural_network.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss:.4f}")

Test Loss: 0.0121
