In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import re
import plotly.express as px
from scipy.stats import pearsonr


from functions_h1 import *

# Hypotesis 1

# H1 : Initial data cheking EDA 1st Dataset : Global_electricity_production

In [2]:
#load dataset 1        
df1 = pd.read_csv('../data/global_electricity_production_data.csv')     
df1.head()                       


Unnamed: 0,country_name,date,parameter,product,value,unit
0,Australia,12/1/2023,Net Electricity Production,Electricity,22646.1901,GWh
1,Australia,12/1/2023,Net Electricity Production,Total Combustible Fuels,13397.9356,GWh
2,Australia,12/1/2023,Net Electricity Production,"Coal, Peat and Manufactured Gases",9768.5223,GWh
3,Australia,12/1/2023,Net Electricity Production,Oil and Petroleum Products,289.5415,GWh
4,Australia,12/1/2023,Net Electricity Production,Natural Gas,3091.9272,GWh


In [3]:
# H1: Perform initial data cheking:
initial_data_checking(df1)


Shape of the DataFrame:

(121074, 6)

Duplicate Rows Number:

0

Summary Statistics:

              value
count  121060.00000
mean     6925.08149
std     34224.45546
min         0.00000
25%        41.19900
50%       470.41900
75%      2629.71275
max    865976.48280


In [4]:
# H1: check unique and missing values:
unique_and_missing_values_dtype(df1)

Unnamed: 0,non-Null_count,dtype,unique_values,%_unique,missing_values,%_missing
country_name,121074,object,48,0.04%,0,0.0%
date,121074,object,168,0.14%,0,0.0%
parameter,121074,object,7,0.01%,0,0.0%
product,121074,object,16,0.01%,0,0.0%
value,121060,float64,101983,84.23%,14,0.01%
unit,121074,object,1,0.0%,0,0.0%


In [5]:
# H1: separate categorical and numerical columns for 1st Dataset : Global_electriciy_production:
separate_columns_h1(df1)


Categorical Columns:

  country_name       date                   parameter  \
0    Australia  12/1/2023  Net Electricity Production   
1    Australia  12/1/2023  Net Electricity Production   
2    Australia  12/1/2023  Net Electricity Production   
3    Australia  12/1/2023  Net Electricity Production   
4    Australia  12/1/2023  Net Electricity Production   

                             product unit  
0                        Electricity  GWh  
1            Total Combustible Fuels  GWh  
2  Coal, Peat and Manufactured Gases  GWh  
3         Oil and Petroleum Products  GWh  
4                        Natural Gas  GWh  

Numerical Columns:

        value
0  22646.1901
1  13397.9356
2   9768.5223
3    289.5415
4   3091.9272


In [6]:
# H1: analyze_categorical cols:
analyze_categorical_h1(df1)

Unnamed: 0,country_name,date,parameter,product,unit
count,121074,121074,121074,121074,121074
unique,48,168,7,16,1
top,France,3/1/2021,Net Electricity Production,Electricity,GWh
freq,3348,893,93318,34930,121074


In [7]:
# H1: analyze_numerical cols:
analyze_numerical_h1(df1)


Numerical Columns Analysis:


Unnamed: 0,value
count,121060.0
mean,6925.08149
std,34224.45546
min,0.0
25%,41.199
50%,470.419
75%,2629.71275
max,865976.4828


In [8]:
# H1: Count missing values in the 'value' column of df1
missing_values_count = df1['value'].isna().sum()

# Print the number of missing values
print("Number of missing values in 'value' column:", missing_values_count)

Number of missing values in 'value' column: 14


In [9]:
# H1: display missing data
missing_value_rows = df1[df1['value'].isna()]

# H1: Print the rows with missing values in the 'value' column
missing_value_rows

Unnamed: 0,country_name,date,parameter,product,value,unit
67,Chile,12/1/2023,Remarks,Data is estimated for this month,,GWh
94,Costa Rica,12/1/2023,Remarks,Data is estimated for this month,,GWh
285,Japan,12/1/2023,Remarks,Data is estimated for this month,,GWh
804,Costa Rica,11/1/2023,Remarks,Data is estimated for this month,,GWh
1517,Costa Rica,10/1/2023,Remarks,Data is estimated for this month,,GWh
2230,Costa Rica,9/1/2023,Remarks,Data is estimated for this month,,GWh
2944,Costa Rica,8/1/2023,Remarks,Data is estimated for this month,,GWh
3659,Costa Rica,7/1/2023,Remarks,Data is estimated for this month,,GWh
4375,Costa Rica,6/1/2023,Remarks,Data is estimated for this month,,GWh
5090,Costa Rica,5/1/2023,Remarks,Data is estimated for this month,,GWh


In [10]:
# H1: display 'parameter' col data
parameter_counts = df1['parameter'].value_counts()
parameter_counts

parameter
Net Electricity Production        93318
Final Consumption (Calculated)     6036
Distribution Losses                6033
Total Exports                      5416
Total Imports                      5411
Used for pumped storage            4846
Remarks                              14
Name: count, dtype: int64

In [11]:
# H1: display 'product' col data
product_count = df1['product'].value_counts()
product_count

product
Electricity                                          34930
Total Combustible Fuels                               7188
Total Renewables (Hydro, Geo, Solar, Wind, Other)     7188
Hydro                                                 7070
Oil and Petroleum Products                            7066
Combustible Renewables                                7049
Wind                                                  7040
Natural Gas                                           6923
Solar                                                 6898
Coal, Peat and Manufactured Gases                     6525
Other Combustible Non-Renewables                      6147
Not Specified                                         5098
Nuclear                                               4814
Geothermal                                            4267
Other Renewables                                      2857
Data is estimated for this month                        14
Name: count, dtype: int64

# H1 : Data cleaning

In [12]:
# H1: format titles names 
format_column_titles_h1(df1)

Index(['country_name', 'date', 'parameter', 'product', 'value', 'unit'], dtype='object')

In [13]:
# H1:  filter EU countries 
eu_countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden']

df_eu = df1.loc[df1['country_name'].isin(eu_countries)]

In [14]:
countries  = df_eu['country_name'].value_counts()
countries

country_name
France             3348
Germany            3230
Portugal           3134
Belgium            3120
Italy              3120
Austria            3120
Spain              3120
Czech Republic     3120
Slovak Republic    3097
Poland             3072
Netherlands        3065
Slovenia           3019
Finland            3000
Sweden             2997
Hungary            2982
Greece             2891
Luxembourg         2874
Ireland            2856
Lithuania          2831
Denmark            2760
Latvia             2679
Estonia            2652
Bulgaria           1438
Croatia            1331
Cyprus             1140
Malta              1101
Name: count, dtype: int64

In [15]:
#  H1: drop rows with missing data 
df_cleaned = df_eu.dropna(axis=1)

In [16]:
#  H1: check cleaned date
unique_and_missing_values_dtype(df_cleaned)

Unnamed: 0,non-Null_count,dtype,unique_values,%_unique,missing_values,%_missing
country_name,71097,object,26,0.04%,0,0.0%
date,71097,object,168,0.24%,0,0.0%
parameter,71097,object,6,0.01%,0,0.0%
product,71097,object,15,0.02%,0,0.0%
value,71097,float64,61218,86.1%,0,0.0%
unit,71097,object,1,0.0%,0,0.0%


In [17]:
# H1: make a copy (before year formatting)
df_main = df_cleaned.copy()

In [18]:
# H1: format year
df_main["year"] = df_main["date"].apply(lambda x: int(x.split("/")[2]))
df_main.head()

Unnamed: 0,country_name,date,parameter,product,value,unit,year
13,Austria,12/1/2023,Net Electricity Production,Electricity,6783.273,GWh,2023
14,Austria,12/1/2023,Net Electricity Production,Total Combustible Fuels,1746.9034,GWh,2023
15,Austria,12/1/2023,Net Electricity Production,"Coal, Peat and Manufactured Gases",167.3548,GWh,2023
16,Austria,12/1/2023,Net Electricity Production,Oil and Petroleum Products,73.8682,GWh,2023
17,Austria,12/1/2023,Net Electricity Production,Natural Gas,1035.0865,GWh,2023


In [19]:
# H1: Reset index
df_main = df_main.reset_index(drop=True)
df_main

Unnamed: 0,country_name,date,parameter,product,value,unit,year
0,Austria,12/1/2023,Net Electricity Production,Electricity,6783.2730,GWh,2023
1,Austria,12/1/2023,Net Electricity Production,Total Combustible Fuels,1746.9034,GWh,2023
2,Austria,12/1/2023,Net Electricity Production,"Coal, Peat and Manufactured Gases",167.3548,GWh,2023
3,Austria,12/1/2023,Net Electricity Production,Oil and Petroleum Products,73.8682,GWh,2023
4,Austria,12/1/2023,Net Electricity Production,Natural Gas,1035.0865,GWh,2023
...,...,...,...,...,...,...,...
71092,Sweden,1/1/2010,Total Imports,Electricity,1806.0000,GWh,2010
71093,Sweden,1/1/2010,Total Exports,Electricity,266.6560,GWh,2010
71094,Sweden,1/1/2010,Used for pumped storage,Electricity,16.1090,GWh,2010
71095,Sweden,1/1/2010,Distribution Losses,Electricity,1091.0110,GWh,2010


In [20]:
# H1: Drop unrelevant column 'index'
#df_main = df_main.drop(columns=['index'])
#df_main.head()

In [21]:
# H1: convert col 'value' to integer 
df_main['value'] = df_main['value'].astype(int)

# H1 : EDA 2st Dataset GDP per country per capita 

In [22]:
# H1: load dataset-2 GDP      
df_gdp = pd.read_csv('../data/gdp_per_capita.csv')     
df_gdp.head()                       

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Aruba,ABW,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,26940.26411,28419.26453,28449.71295,29329.08175,30918.48358,31902.80982,24008.12782,29127.75938,33300.83882,
1,Africa Eastern and Southern,AFE,GDP per capita (current US$),NY.GDP.PCAP.CD,162.342518,166.263682,171.961916,182.01848,192.639989,202.847824,...,1678.55361,1498.805084,1346.301281,1485.753579,1558.612079,1508.486886,1356.088871,1545.956697,1642.432039,1672.505957
2,Afghanistan,AFG,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,626.512929,566.88113,523.053012,526.140801,492.090631,497.741431,512.055098,355.777826,352.603733,
3,Africa Western and Central,AFW,GDP per capita (current US$),NY.GDP.PCAP.CD,122.193931,127.732834,134.13007,139.291222,148.821355,156.314136,...,2248.518426,1882.518808,1648.920269,1590.555785,1735.445833,1813.609146,1688.470871,1769.171853,1788.875347,1584.333285
4,Angola,AGO,GDP per capita (current US$),NY.GDP.PCAP.CD,,,,,,,...,5011.984427,3217.33924,1809.709377,2439.374439,2540.508879,2191.347764,1450.905111,1927.474078,2933.484644,2309.52162


In [23]:
# H1: Drop cols "Indicator Name", "Indicator Code"
df_gdp_cleaned = df_gdp.drop(columns=["Indicator Name", "Indicator Code"])
df_gdp_cleaned

Unnamed: 0,Country Name,Country Code,1960,1961,1962,1963,1964,1965,1966,1967,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Aruba,ABW,,,,,,,,,...,26940.264110,28419.264530,28449.712950,29329.081750,30918.483580,31902.809820,24008.127820,29127.759380,33300.838820,
1,Africa Eastern and Southern,AFE,162.342518,166.263682,171.961916,182.018480,192.639989,202.847824,214.666914,226.978859,...,1678.553610,1498.805084,1346.301281,1485.753579,1558.612079,1508.486886,1356.088871,1545.956697,1642.432039,1672.505957
2,Afghanistan,AFG,,,,,,,,,...,626.512929,566.881130,523.053012,526.140801,492.090631,497.741431,512.055098,355.777826,352.603733,
3,Africa Western and Central,AFW,122.193931,127.732834,134.130070,139.291222,148.821355,156.314136,162.890667,145.568085,...,2248.518426,1882.518808,1648.920269,1590.555785,1735.445833,1813.609146,1688.470871,1769.171853,1788.875347,1584.333285
4,Angola,AGO,,,,,,,,,...,5011.984427,3217.339240,1809.709377,2439.374439,2540.508879,2191.347764,1450.905111,1927.474078,2933.484644,2309.521620
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,Kosovo,XKX,,,,,,,,,...,3902.530841,3520.782075,3759.472855,4009.353811,4384.188680,4416.029253,4310.934002,5270.550640,5290.977397,5943.125714
262,"Yemen, Rep.",YEM,,,,,,,,,...,1557.601406,1488.416289,1069.817009,893.716501,701.714865,693.816508,626.309169,583.849041,698.850350,533.367124
263,South Africa,ZAF,529.561923,543.042224,560.699395,601.599951,642.688431,681.131111,718.118179,775.152812,...,6965.137897,6204.929901,5735.066787,6734.475153,7067.724165,6702.526617,5753.066494,7073.612754,6766.481254,6253.161613
264,Zambia,ZMB,228.567398,216.274674,208.562685,209.453362,236.941713,296.022427,334.672528,350.653425,...,1724.576220,1307.909649,1249.923143,1495.752138,1475.199883,1268.120941,958.264900,1134.713454,1456.901570,1369.129365


In [24]:
# H1: transform data from wide to long format with "stack() function"
df_stacked = df_gdp_cleaned.set_index(["Country Name", "Country Code"]).stack().reset_index()
df_stacked.columns = ["Country", "Country Code", "Year", "GDP"]
df_stacked.head()

Unnamed: 0,Country,Country Code,Year,GDP
0,Aruba,ABW,1986,6283.001443
1,Aruba,ABW,1987,7567.253642
2,Aruba,ABW,1988,9274.514156
3,Aruba,ABW,1989,10767.39622
4,Aruba,ABW,1990,11638.73371


In [25]:
# H1: format_column_titles
format_column_titles_h1(df_stacked)

Index(['country', 'country_code', 'year', 'gdp'], dtype='object')

In [26]:
#  H1: rename column country
df_stacked.rename(columns={'country': 'country_name'}, inplace=True)
df_stacked.head()

Unnamed: 0,country_name,country_code,year,gdp
0,Aruba,ABW,1986,6283.001443
1,Aruba,ABW,1987,7567.253642
2,Aruba,ABW,1988,9274.514156
3,Aruba,ABW,1989,10767.39622
4,Aruba,ABW,1990,11638.73371


In [27]:
# H1: rename column 'Czechia' to 'Czech Republic'
df_stacked['country_name'] = df_stacked['country_name'].replace(['Czechia'], 'Czech Republic')

In [28]:
# H1: filter EU countries 
eu_countries = ['Austria', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic', 'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece', 'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg', 'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovak Republic', 'Slovenia', 'Spain', 'Sweden']

df_stacked_eu = df_stacked.loc[df_stacked['country_name'].isin(eu_countries)]

In [29]:
# H1: reset index
df_stacked_eu = df_stacked_eu.reset_index()
df_stacked_eu

Unnamed: 0,index,country_name,country_code,year,gdp
0,633,Austria,AUT,1960,943.610800
1,634,Austria,AUT,1961,1040.804885
2,635,Austria,AUT,1962,1097.312202
3,636,Austria,AUT,1963,1177.168242
4,637,Austria,AUT,1964,1280.472577
...,...,...,...,...,...
1390,11861,Sweden,SWE,2019,51939.429740
1391,11862,Sweden,SWE,2020,52837.903980
1392,11863,Sweden,SWE,2021,61417.680880
1393,11864,Sweden,SWE,2022,56299.505730


In [30]:
# H1: GDP col round and convert in int type
df_stacked_eu['gdp'] = df_stacked_eu['gdp'].astype(int)

In [31]:
# H1: check missing values 
unique_and_missing_values_dtype(df_stacked_eu)

Unnamed: 0,non-Null_count,dtype,unique_values,%_unique,missing_values,%_missing
index,1395,int64,1395,100.0%,0,0.0%
country_name,1395,object,27,1.94%,0,0.0%
country_code,1395,object,27,1.94%,0,0.0%
year,1395,object,64,4.59%,0,0.0%
gdp,1395,int64,1366,97.92%,0,0.0%


In [32]:
# H1: drop col index: 
df_stacked_eu = df_stacked_eu.drop(columns=['index'])

In [33]:
# H1: perform unique_and_missing_values_dtype analysis
unique_and_missing_values_dtype(df_stacked_eu)

Unnamed: 0,non-Null_count,dtype,unique_values,%_unique,missing_values,%_missing
country_name,1395,object,27,1.94%,0,0.0%
country_code,1395,object,27,1.94%,0,0.0%
year,1395,object,64,4.59%,0,0.0%
gdp,1395,int64,1366,97.92%,0,0.0%


In [34]:
#  H1: remove 'Romania' data from the GDP dataset:
df_stacked_eu = df_stacked_eu.drop(df_stacked_eu[df_stacked_eu['country_name'] == 'Romania'].index)
df_stacked_eu.head()


Unnamed: 0,country_name,country_code,year,gdp
0,Austria,AUT,1960,943
1,Austria,AUT,1961,1040
2,Austria,AUT,1962,1097
3,Austria,AUT,1963,1177
4,Austria,AUT,1964,1280


In [35]:
#  H1: compare countries in both datasets:
unique_countries_eu = df_stacked_eu['country_name'].unique()
unique_countries_main = df_main['country_name'].unique()

# Find countries in df_main that are not in df_stacked_eu
countries_not_in_eu = set(unique_countries_main) - set(unique_countries_eu)
# Find countries in df_stacked_eu that are not in df_main
countries_not_in_main = set(unique_countries_eu) - set(unique_countries_main)

print("Countries in df_main not in df_stacked_eu:", countries_not_in_eu)
print("Countries in df_stacked_eu not in df_main:", countries_not_in_main)

Countries in df_main not in df_stacked_eu: set()
Countries in df_stacked_eu not in df_main: set()


In [36]:
#  H1: make a copy of GDP dataset
df_gdp_final = df_stacked_eu.copy()
df_gdp_final.head()

Unnamed: 0,country_name,country_code,year,gdp
0,Austria,AUT,1960,943
1,Austria,AUT,1961,1040
2,Austria,AUT,1962,1097
3,Austria,AUT,1963,1177
4,Austria,AUT,1964,1280


In [37]:
# H1 : convert df_gdp_final['year'] to int
df_gdp_final['year'] = df_gdp_final['year'].astype(int)

# H1: cleaned databases : 

In [38]:
df_main  # global electricity production
df_gdp_final #gdp per capita

Unnamed: 0,country_name,country_code,year,gdp
0,Austria,AUT,1960,943
1,Austria,AUT,1961,1040
2,Austria,AUT,1962,1097
3,Austria,AUT,1963,1177
4,Austria,AUT,1964,1280
...,...,...,...,...
1390,Sweden,SWE,2019,51939
1391,Sweden,SWE,2020,52837
1392,Sweden,SWE,2021,61417
1393,Sweden,SWE,2022,56299


# H1: Correlation research 
How does GDP rate influence renewable energy adoption in EU countries from 2010 to 2023


# H1: Correlation idea: 
1. in df_main find the % of renevavable energy
2. make a pivot df with cols : country, year, % of renevable energy
3. merge df energy production  with gdp_df (cols: country, year, gdp) by cols country and year
4. perform a scatterplot visualisation by 'GDP' and '% of renuvable'


## H1.1. In df_main find the % of renevavable energy

In [39]:
df_main.columns

Index(['country_name', 'date', 'parameter', 'product', 'value', 'unit',
       'year'],
      dtype='object')

In [40]:
# H1 : make a copy of df
df_energy_prod = df_main.copy()

In [41]:
# Filter the DataFrame to keep only the specified products: 'Total Combustible Fuels', 'Total Renevables (Hydro, Geo, Solar, Wind, Other)': 
filtered_df_energy_prod = df_energy_prod[df_energy_prod['product'].isin(['Total Combustible Fuels', 'Total Renewables (Hydro, Geo, Solar, Wind, Other)'])]

## H1.2. make a pivot df_main (energy prod) with cols : country, year, % of renevable energy

In [42]:
# H1.2. make a pivot table
pivot_df_energy = filtered_df_energy_prod.pivot_table(index=['year', 'country_name'], columns='product', values='value', aggfunc='sum').reset_index()

# Calculate the share of renewables
pivot_df_energy['%_of_renewable'] = (pivot_df_energy['Total Renewables (Hydro, Geo, Solar, Wind, Other)'] / 
                             (pivot_df_energy['Total Combustible Fuels'] + pivot_df_energy['Total Renewables (Hydro, Geo, Solar, Wind, Other)'])) * 100

In [43]:
# H1.2.ensure that in the both dataframes the date range is between 2010 and 2023
df_gdp_final_filtered = df_gdp_final[df_gdp_final['year'] >= 2010]

In [44]:
df_gdp_final_filtered['year'].unique()

array([2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020,
       2021, 2022, 2023])

## H1.3. merge df energy production  with gdp_df (cols: country, year, gdp) by cols country and year


In [45]:
# H1.3.merge both dataframes
df_merged = pd.merge(pivot_df_energy, df_gdp_final_filtered, on=['year', 'country_name'], how='left')
df_merged 

Unnamed: 0,year,country_name,Total Combustible Fuels,"Total Renewables (Hydro, Geo, Solar, Wind, Other)",%_of_renewable,country_code,gdp
0,2010,Austria,26038,45804,63.756577,AUT,46903
1,2010,Belgium,42054,7674,15.431950,BEL,44184
2,2010,Czech Republic,47559,6229,11.580650,CZE,19960
3,2010,Denmark,29030,12128,29.466932,DNK,58041
4,2010,Estonia,11422,965,7.790425,EST,14663
...,...,...,...,...,...,...,...
339,2023,Portugal,15468,36667,70.330872,PRT,27275
340,2023,Slovak Republic,6279,7471,54.334545,SVK,24470
341,2023,Slovenia,3552,6379,64.233209,SVN,32163
342,2023,Spain,81697,141350,63.372294,ESP,32676


In [46]:
# H1.3.convert df_main['%_of_renewable'] to int
df_merged['%_of_renewable'] = df_merged['%_of_renewable'].astype(int)

In [47]:
# H1.3.remove unrelevant cols 
df_viz = df_merged.drop(columns=['Total Combustible Fuels', 'Total Renewables (Hydro, Geo, Solar, Wind, Other)', 'country_code'])
df_viz

Unnamed: 0,year,country_name,%_of_renewable,gdp
0,2010,Austria,63,46903
1,2010,Belgium,15,44184
2,2010,Czech Republic,11,19960
3,2010,Denmark,29,58041
4,2010,Estonia,7,14663
...,...,...,...,...
339,2023,Portugal,70,27275
340,2023,Slovak Republic,54,24470
341,2023,Slovenia,64,32163
342,2023,Spain,63,32676


## H1.4. perform a scatterplot visualisation

In [48]:
# H1.4. correlation index visualizaton 
create_scatter_plot_with_trendline(df_viz)

The scatterplot shows the relationship between GDP and the percentage of renewable energy production for EU countries in 2010-2023. 
## Positive Correlation
The trendline indicates a positive correlation between GDP and the percentage of renewable energy. 
This means that, on average, countries with higher GDP tend to have a higher percentage of renewable energy production. 

The positive trend suggests that economically developed countries are more likely to invest in renewable energy sources. This could be due to higher availability of funds for infrastructure, more advanced technology, and stronger policy frameworks supporting renewable energy.

Despite the positive trend, there is considerable variability in the data. Many countries with lower GDP have a wide range of renewable energy percentages, from very low to very high.
Similarly, even at higher GDP levels, there is a spread in the percentage of renewable energy, however it tends to be somewhat more concentrated.





In [49]:
# H1.4. bar chart: the average % of renewable energy adoption for each country in 2010-2023
create_bar_chart_with_target(df_viz)