In [3]:
import pandas as pd
import matplotlib.pyplot as plt

renewables_csv = 'renewable-share-energy.csv'
co2_csv = 'consumption-co2-per-capita.csv'
gdp_file = "world_bank_GDP_per_country.csv"

In [None]:
# functions to turn the CSV files into Pandas dataframes

def vertical_to_horizontal(csv_file):
    # Read the CSV file into a DataFrame
    dataframe = pd.read_csv(csv_file)

    # Pivot the DataFrame based on 'Entity' column
    pivoted_df = dataframe.pivot_table(index='Year', columns='Entity', values=dataframe.columns[3]).reset_index()

    return pivoted_df

# flipping the data and removing unused years.

renewables_flipped = vertical_to_horizontal(renewables_csv)
renewables_flipped = renewables_flipped[(renewables_flipped['Year'] >= 1990) & (renewables_flipped['Year'] <= 2020)]

co2_flipped = vertical_to_horizontal(co2_csv)
co2_flipped = co2_flipped[(co2_flipped['Year'] >= 1990) & (co2_flipped['Year'] <= 2020)]

non_country_entities = [
    'High-income countries', 'South America', 'Europe', 'Oceania', 'Africa',
    'Lower-middle-income countries', 'Upper-middle-income countries', 'World',
    'North America', 'European Union (27)', 'Asia'
]

def filter_countries_1990_2020(df):
    # Filter the DataFrame to keep only rows for 1990 and 2020
    df_1990 = df[df['Year'] == 1990]
    df_2020 = df[df['Year'] == 2020]

    # Extract the countries that have data in both 1990 and 2020
    countries_1990 = set(df_1990.columns[df_1990.notnull().any()])
    countries_2020 = set(df_2020.columns[df_2020.notnull().any()])

    countries_both_years = list(countries_1990.intersection(countries_2020))

    # Filter the original DataFrame to include only columns (countries) present in both 1990 and 2020
    filtered_df = df[df.columns[df.columns.isin(countries_both_years)]]

    return filtered_df

renewables_flipped = filter_countries_1990_2020(renewables_flipped)
co2_flipped = filter_countries_1990_2020(co2_flipped)

def filter_common_countries(df1, df2):
    countries_df1 = set(df1.columns)
    countries_df2 = set(df2.columns)
    common_countries = list(countries_df1.intersection(countries_df2))

    # Exclude 'Year' from removal if present in common columns
    if 'Year' in common_countries:
        common_countries.remove('Year')

    # Filter DataFrames to include only common countries (excluding 'Year')
    filtered_df1 = df1[['Year'] + common_countries]
    filtered_df2 = df2[['Year'] + common_countries]

    return filtered_df1, filtered_df2

renewables_filtered, co2_filtered = filter_common_countries(renewables_flipped, co2_flipped)

# Remove non-country entities from the DataFrames
renewables_filtered = renewables_filtered.drop(columns=non_country_entities, errors='ignore')
co2_filtered = co2_filtered.drop(columns=non_country_entities, errors='ignore')


display(renewables_filtered)
display(co2_filtered)

In [4]:
# Renewables GDP per capita function

def renewables_gdp_percapita(directory, country_code):
    dataframe = pd.read_csv(directory, skiprows=range(4))
    
    # Filter the DataFrame based on the regional entity (Country Code)
    filtered_data = dataframe[dataframe['Country Code'] == country_code]
    
    # Extract the years and corresponding GDP values into a Pandas DataFrame
    gdp_data_df = filtered_data.iloc[:, 4:]  # Assuming GDP values start from the 5th column
    gdp_data_df = gdp_data_df.T.reset_index()
    gdp_data_df.columns = ['Year', 'GDP (current US$)']
    
    # Convert 'Year' column to numeric and exclude non-numeric values
    gdp_data_df['Year'] = pd.to_numeric(gdp_data_df['Year'], errors='coerce')
    gdp_data_df = gdp_data_df.dropna(subset=['Year'])
    
    return gdp_data_df

In [5]:
renewables_gdp_percapita(gdp_file, "RUS")

Unnamed: 0,Year,GDP (current US$)
0,1960.0,
1,1961.0,
2,1962.0,
3,1963.0,
4,1964.0,
...,...,...
58,2018.0,1.657329e+12
59,2019.0,1.693115e+12
60,2020.0,1.493076e+12
61,2021.0,1.836892e+12
