In [2]:
import pandas as pd
import matplotlib.pyplot as plt

renewables_csv = 'renewable-share-energy.csv'
co2_csv = 'consumption-co2-per-capita.csv'
gdp_file = "world_bank_GDP_per_country.csv"

In [3]:
# functions to turn the CSV files into Pandas dataframes

def vertical_to_horizontal(csv_file):
    # Read the CSV file into a DataFrame
    dataframe = pd.read_csv(csv_file)

    # Pivot the DataFrame based on 'Entity' column
    pivoted_df = dataframe.pivot_table(index='Year', columns='Code', values=dataframe.columns[3]).reset_index()
    
    return pivoted_df

# flipping the data and removing unused years.

renewables_flipped = vertical_to_horizontal(renewables_csv)
renewables_flipped = renewables_flipped[(renewables_flipped['Year'] >= 1990) & (renewables_flipped['Year'] <= 2020)]

co2_flipped = vertical_to_horizontal(co2_csv)
co2_flipped = co2_flipped[(co2_flipped['Year'] >= 1990) & (co2_flipped['Year'] <= 2020)]

non_country_entities = [
    'High-income countries', 'South America', 'Europe', 'Oceania', 'Africa',
    'Lower-middle-income countries', 'Upper-middle-income countries', 'World',
    'North America', 'European Union (27)', 'Asia', 'OWID_WRL'
]

def filter_countries_1990_2020(df):
    # Filter the DataFrame to keep only rows for 1990 and 2020
    df_1990 = df[df['Year'] == 1990]
    df_2020 = df[df['Year'] == 2020]

    # Extract the countries that have data in both 1990 and 2020
    countries_1990 = set(df_1990.columns[df_1990.notnull().any()])
    countries_2020 = set(df_2020.columns[df_2020.notnull().any()])

    countries_both_years = list(countries_1990.intersection(countries_2020))

    # Filter the original DataFrame to include only columns (countries) present in both 1990 and 2020
    filtered_df = df[df.columns[df.columns.isin(countries_both_years)]]

    return filtered_df

renewables_flipped = filter_countries_1990_2020(renewables_flipped)
co2_flipped = filter_countries_1990_2020(co2_flipped)

def filter_common_countries(df1, df2):
    countries_df1 = set(df1.columns)
    countries_df2 = set(df2.columns)
    common_countries = list(countries_df1.intersection(countries_df2))

    # Exclude 'Year' from removal if present in common columns
    if 'Year' in common_countries:
        common_countries.remove('Year')

    # Filter DataFrames to include only common countries (excluding 'Year')
    filtered_df1 = df1[['Year'] + common_countries]
    filtered_df2 = df2[['Year'] + common_countries]

    return filtered_df1, filtered_df2

renewables_filtered, co2_filtered = filter_common_countries(renewables_flipped, co2_flipped)

# Remove non-country entities from the DataFrames
renewables_filtered = renewables_filtered.drop(columns=non_country_entities, errors='ignore')
renewables_filtered.set_index('Year', inplace=True)

co2_filtered = co2_filtered.drop(columns=non_country_entities, errors='ignore')
co2_filtered.set_index('Year', inplace=True)


display(renewables_filtered)
display(co2_filtered)


Renewables per capita Dataframe


Entity,Year,Egypt,New Zealand,Kazakhstan,Latvia,Spain,Bulgaria,Malaysia,Morocco,Luxembourg,...,Switzerland,Indonesia,Bangladesh,Chile,Vietnam,United Kingdom,United States,India,Canada,Lithuania
25,1990,7.520866,41.426918,2.534389,15.375505,7.198869,1.707941,4.940909,4.359246,0.664601,...,26.992222,3.731863,3.539271,18.957916,20.584055,0.680118,4.724676,8.54283,29.480997,0.606307
26,1991,7.396339,40.412006,2.479837,12.153858,7.452908,2.74335,4.358415,4.3259,0.733811,...,28.26373,3.81247,3.477609,24.08466,23.790989,0.610434,4.724286,9.029866,30.499428,0.468239
27,1992,7.380687,37.46697,2.395667,13.070239,5.013249,2.464117,3.721478,3.039623,0.610749,...,28.394741,4.408543,3.028885,28.355387,24.912485,0.742499,4.261231,8.140624,30.51211,0.703257
28,1993,7.595429,40.20715,3.015272,17.300793,6.625087,2.174688,4.18968,1.382297,0.604114,...,31.083033,3.847653,2.104329,27.529266,24.039852,0.66765,4.57141,8.035936,30.450893,1.136311
29,1994,7.676783,41.244785,4.072831,20.521252,7.313819,1.708674,5.270333,2.377947,0.775281,...,32.318325,3.521223,2.76168,25.954521,24.972696,0.825375,4.254817,8.667226,29.927301,1.514459
30,1995,7.558946,42.02623,4.114585,19.48626,5.938282,2.505726,4.620305,1.763483,0.950182,...,30.77135,3.539269,1.026995,26.028479,24.608881,0.789619,4.762141,7.686768,29.888117,1.118191
31,1996,7.361753,39.501213,4.159171,13.271263,9.653097,3.173351,3.450362,5.402516,0.649677,...,26.179207,3.508815,1.947564,22.5519,24.35814,0.644379,5.018571,6.789634,30.879307,0.897941
32,1997,7.243849,35.65875,4.181545,20.91794,8.211403,3.166136,2.393924,5.531386,0.902584,...,28.932562,2.477279,1.803428,21.909548,20.527615,0.825345,5.115221,6.5709,30.284904,0.881222
33,1998,7.154178,38.80477,4.193706,28.085598,7.81521,3.702421,2.660114,4.600728,1.294618,...,28.326654,4.014905,2.037124,18.059258,18.064957,0.987954,4.695756,7.381819,29.307375,1.161542
34,1999,7.409151,35.58187,4.455529,21.138712,5.629497,3.745924,4.217979,2.058802,1.046319,...,32.331287,3.852458,1.939729,15.275814,21.659023,1.107733,4.583452,7.124597,29.588795,1.394003


CO2 per capita dataframe


Entity,Year,Egypt,New Zealand,Kazakhstan,Latvia,Spain,Bulgaria,Malaysia,Morocco,Luxembourg,...,Switzerland,Indonesia,Bangladesh,Chile,Vietnam,United Kingdom,United States,India,Canada,Lithuania
0,1990,1.431379,7.909789,17.148327,8.031676,6.823517,6.998754,3.741053,1.240285,31.01004,...,12.773527,0.766681,0.170461,2.402846,0.348098,11.696713,20.347502,0.660851,17.463978,11.455255
1,1991,1.336157,7.759744,12.50239,7.425428,6.953576,5.129974,4.015785,1.231995,32.20236,...,12.969457,0.869577,0.177141,2.28157,0.339298,11.793274,19.72527,0.671166,17.216078,12.141571
2,1992,1.456119,8.019308,7.600424,6.927283,7.483764,4.871405,4.368112,1.409892,31.18512,...,13.929676,0.984626,0.185828,2.412116,0.333304,12.046045,20.175217,0.712204,17.475994,13.901977
3,1993,1.533426,7.932907,10.002325,5.560165,6.925846,5.284869,4.542447,1.35915,31.143454,...,12.585456,1.018359,0.185,2.532601,0.353906,11.407504,20.126003,0.70352,16.834455,8.606816
4,1994,1.421384,8.037168,8.810326,5.287941,6.972272,4.868482,4.489757,1.359539,28.709126,...,12.370958,1.025336,0.200583,2.665795,0.393447,11.245523,20.2478,0.724505,16.825378,7.65297
5,1995,1.538559,8.254908,8.170543,4.950342,7.232432,5.495648,4.94538,1.368468,22.450573,...,12.833714,1.037032,0.223746,2.846301,0.422977,11.14485,20.21765,0.746073,16.919556,7.510003
6,1996,1.517354,8.594522,7.382071,5.633445,6.807071,5.821172,4.893714,1.23544,22.269045,...,15.23,1.139294,0.226845,3.27217,0.501402,11.606233,20.590729,0.781769,17.071121,6.154727
7,1997,1.665535,8.958088,7.060441,5.477687,6.907004,5.606638,5.306779,1.171168,20.443844,...,13.52747,1.204452,0.238152,3.680092,0.598093,11.551663,20.717155,0.802062,17.972843,5.894873
8,1998,1.878637,8.258272,7.486028,5.464158,7.22575,5.580946,3.643408,1.252733,18.121647,...,16.40141,0.667905,0.232896,3.711149,0.610385,11.925702,21.054504,0.803076,17.939676,6.070131
9,1999,1.895436,8.883102,6.59805,5.081194,7.7432,5.097709,4.234014,1.345823,20.157166,...,14.77959,0.986858,0.237875,3.890885,0.556379,11.647475,21.440773,0.868013,17.817673,5.478316


In [4]:
# Renewables GDP per capita function

def renewables_gdp_percapita(directory, country_code):
    dataframe = pd.read_csv(directory, skiprows=range(4))
    
    # Filter the DataFrame based on the regional entity (Country Code)
    filtered_data = dataframe[dataframe['Country Code'] == country_code]
    
    # Extract the years and corresponding GDP values into a Pandas DataFrame
    gdp_data_df = filtered_data.iloc[:, 4:]  # Assuming GDP values start from the 5th column
    gdp_data_df = gdp_data_df.T.reset_index()
    gdp_data_df.columns = ['Year', 'GDP (current US$)']
    
    # Convert 'Year' column to numeric and exclude non-numeric values
    gdp_data_df['Year'] = pd.to_numeric(gdp_data_df['Year'], errors='coerce')
    gdp_data_df = gdp_data_df.dropna(subset=['Year'])
    
    return gdp_data_df

In [5]:
def renewables_gdp_percapita(directory):
    dataframe = pd.read_csv(directory, skiprows=range(4))
    
    # Extract the years and corresponding GDP values into a Pandas DataFrame
    gdp_data_df = dataframe.iloc[:, 4:]  # Assuming GDP values start from the 5th column
    gdp_data_df = gdp_data_df.T.reset_index()
    gdp_data_df.columns = ['Year', 'GDP (current US$)']
    
    # Convert 'Year' column to numeric and exclude non-numeric values
    gdp_data_df['Year'] = pd.to_numeric(gdp_data_df['Year'], errors='coerce')
    gdp_data_df = gdp_data_df.dropna(subset=['Year'])
    
    return gdp_data_df


In [6]:
display(renewables_gdp_percapita(gdp_file))

ValueError: Length mismatch: Expected axis has 267 elements, new values have 2 elements

In [5]:
renewables_gdp_percapita(gdp_file, "RUS")

Unnamed: 0,Year,GDP (current US$)
0,1960.0,
1,1961.0,
2,1962.0,
3,1963.0,
4,1964.0,
...,...,...
58,2018.0,1.657329e+12
59,2019.0,1.693115e+12
60,2020.0,1.493076e+12
61,2021.0,1.836892e+12
