In [1]:
import pandas as pd
import geopandas as gpd
import os
from osgeo import ogr


In [2]:
os.chdir(r"c:\\Users\\flv.eco\\OneDrive - CBS - Copenhagen Business School\\Documents\\01_PtX_Markets\\")

In [3]:
gdp_pop= pd.read_excel("Scripts\SSP2_GDP_POP.xlsx", sheet_name = 'data')

In [4]:
iiasa_pop = gdp_pop.query('Model == "IIASA-WiC POP 2023"')
iiasa_gdp = gdp_pop.query('Model == "IIASA GDP 2023"')
iiasa_gdp_2020 = gdp_pop.query('Model == "OECD ENV-Growth 2023"')[["Region","2020"]]

In [5]:
iiasa_gdp_2020.reset_index(inplace=True)
iiasa_gdp.reset_index(inplace=True)
iiasa_pop.reset_index(inplace=True)

In [6]:
iiasa_gdp.loc[:, "2020"] = iiasa_gdp_2020["2020"]

In [7]:
iiasa_gdp = iiasa_gdp.drop(columns=["index", "Scenario", "Variable", "2055", "2060", "2065","2070","2075", "2080", "2085", "2090", "2095", "2100"])
iiasa_pop = iiasa_pop.drop(columns=["index", "Scenario", "Variable", "2055", "2060", "2065","2070","2075", "2080", "2085", "2090", "2095", "2100"])

In [8]:
def calculate_growth(df, regions, years):
    """
    Calculates growth for each region and year and returns a DataFrame with results.

    Parameters:
    - df: DataFrame containing GDP data with columns ['Region', years...].
    - regions: List of regions to iterate over.
    - years: List of years to calculate growth for.
    - historic_value: The reference value for calculating growth.

    Returns:
    - DataFrame with regions as rows and years as columns.
    """
    results = []

    for region in regions:
        # Initialize a dictionary for the current region
        new_row = {'Region': region}
        historic_value = df.loc[df.Region == region]["2020"].values[0]

        # Calculate growth for each year and add to the row
        for year in years:
            growth = df.loc[df['Region'] == region, str(year)].values[0] / historic_value
            new_row[year] = growth

        # Append the row to the results list
        results.append(new_row)

    # Convert results into a DataFrame
    results_df = pd.DataFrame(results)

    return results_df

In [9]:

regions_gdp = iiasa_gdp['Region'].unique()  
regions_pop = iiasa_pop['Region'].unique()  
years = ["2025", "2030", "2035", "2040", "2045", "2050"]  

# Call the function
growthrate_gdp = calculate_growth(iiasa_gdp, regions_gdp, years)
growthrate_pop = calculate_growth(iiasa_pop, regions_pop, years)

In [10]:
growthrate_gdp = growthrate_gdp.T
growthrate_gdp = growthrate_gdp.reset_index()
growthrate_gdp.columns = growthrate_gdp.iloc[0]
growthrate_gdp = growthrate_gdp.drop(growthrate_gdp.index[0])
growthrate_gdp = growthrate_gdp.rename(columns={"Region": "Year"})
growthrate_gdp.set_index("Year", inplace=True)
growthrate_gdp.index = growthrate_gdp.index.astype(int)
growthrate_gdp.index= pd.to_datetime(growthrate_gdp.index, format='%Y')


In [11]:
growthrate_pop = growthrate_pop.T
growthrate_pop = growthrate_pop.reset_index()
growthrate_pop.columns = growthrate_pop.iloc[0]
growthrate_pop = growthrate_pop.drop(growthrate_pop.index[0])
growthrate_pop = growthrate_pop.rename(columns={"Region": "Year"})
growthrate_pop.set_index("Year", inplace=True)
growthrate_pop.index = growthrate_pop.index.astype(int)
growthrate_pop.index= pd.to_datetime(growthrate_pop.index, format='%Y')


In [12]:
def calculate_absolute_growth(df_historic, df_growthrate, regions, years):
    """
    Calculates absolute growth for each region and year and returns a DataFrame with results.

    Parameters:
    - df_historic: DataFrame containing historic GDP data with years as index and regions as columns.
    - df_growthrate: DataFrame containing growth rates with years as index and regions as columns.
    - regions: List of regions to iterate over.
    - years: List of years to calculate growth for.

    Returns:
    - DataFrame with regions as rows and years as columns.
    """
    results = []

    for region in regions:
        # Initialize a dictionary for the current region
        new_row = {'Region': region}

        # Extract historic GDP value for 2020 (ensure it's a scalar)
        historic_value = df_historic.loc["2020-01-01", region]  # Correct extraction

        # Calculate absolute growth for each year
        for year in years:
            absolute_growth = df_growthrate.loc[str(year), region].values[0] * historic_value  # Extract value properly
            new_row[year] = absolute_growth

        # Append the row to the results list
        results.append(new_row)

    # Convert results into a DataFrame
    results_df = pd.DataFrame(results)

    return results_df


In [56]:
gdp = pd.read_excel("Scripts\GDP_EU.xlsx", sheet_name = 'Sheet 1')
gdp = gdp.iloc[6:43].reset_index(drop=True)
gdp = gdp.T
gdp = gdp.reset_index(drop=True)
gdp.columns = gdp.iloc[0]
gdp = gdp.drop(gdp.index[0]) 
gdp = gdp.rename(columns={"TIME": "Year"})
gdp = gdp.dropna(subset=['Year'])
gdp = gdp.drop(columns=['GEO (Labels)'])
gdp = gdp.set_index("Year")
gdp.index = gdp.index.astype(int)
gdp.index= pd.to_datetime(gdp.index, format='%Y')
gdp = gdp.rename(columns={"Türkiye": "Turkey"})

In [57]:
regions_gdp[regions_gdp== "European Union (R9)"]= "European Union - 27 countries (from 2020)"
growthrate_gdp = growthrate_gdp.rename(columns={"European Union (R9)": "European Union - 27 countries (from 2020)"})

In [93]:
regions_gdp

array(['European Union - 27 countries (from 2020)', 'Albania', 'Austria',
       'Belgium', 'Bulgaria', 'Switzerland', 'Cyprus', 'Czechia',
       'Germany', 'Denmark', 'Spain', 'Estonia', 'Finland', 'France',
       'Greece', 'Croatia', 'Hungary', 'Ireland', 'Italy', 'Lithuania',
       'Luxembourg', 'Latvia', 'North Macedonia', 'Malta', 'Montenegro',
       'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'Serbia',
       'Slovakia', 'Slovenia', 'Sweden', 'Turkey'], dtype=object)

In [73]:
gdp_projection = calculate_absolute_growth(gdp, growthrate_gdp, regions_gdp, years)

In [74]:
gdp_projection = gdp_projection.T
gdp_projection= gdp_projection.reset_index()
gdp_projection.columns = gdp_projection.iloc[0]
gdp_projection = gdp_projection.drop(gdp_projection.index[0]) 
gdp_projection = gdp_projection.rename(columns={"Region": "Year"})
gdp_projection = gdp_projection.set_index("Year")
gdp_projection.index = gdp_projection.index.astype(int)
gdp_projection.index= pd.to_datetime(gdp_projection.index, format='%Y')

In [75]:
gdp_projection = pd.concat([gdp, gdp_projection])
gdp_projection = gdp_projection.apply(pd.to_numeric, errors='coerce')
gdp_projection = gdp_projection.resample('YS').interpolate(method='linear')
#gdp_projection = gdp_projection[gdp_projection.index > '2021-01-01']

In [92]:
gdp_projection.to_excel("Scripts\Projected_GDP.xlsx")

In [80]:
regions_pop[regions_pop== "European Union (R9)"]= "European Union - 27 countries (from 2020)"
growthrate_pop= growthrate_pop.rename(columns={"European Union (R9)": "European Union - 27 countries (from 2020)"})

In [81]:
pop = pd.read_excel("Scripts\POP_EU.xlsx", sheet_name = 'Sheet 1')
pop = pop.T
pop = pop.reset_index(drop=True)
pop.columns = pop.iloc[0]
pop = pop.drop(pop.index[0]) 
pop = pop.rename(columns={"TIME": "Year"})
pop = pop.dropna(subset=['Year'])
pop = pop.drop(columns=['GEO (Labels)'])
pop = pop.set_index("Year")
pop.index = pop.index.astype(int)
pop.index= pd.to_datetime(pop.index, format='%Y')
pop = pop.dropna(axis='columns')
pop = pop.rename(columns={"Türkiye": "Turkey"})
pop = pop[regions_pop]

In [82]:
pop_projection = calculate_absolute_growth(pop, growthrate_pop, regions_pop, years)

In [83]:
pop_projection = pop_projection.T
pop_projection= pop_projection.reset_index()
pop_projection.columns = pop_projection.iloc[0]
pop_projection = pop_projection.drop(pop_projection.index[0]) 
pop_projection = pop_projection.rename(columns={"Region": "Year"})
pop_projection = pop_projection.set_index("Year")
pop_projection.index = pop_projection.index.astype(int)
pop_projection.index= pd.to_datetime(pop_projection.index, format='%Y')

In [84]:
pop_projection = pd.concat([pop, pop_projection])
pop_projection = pop_projection.apply(pd.to_numeric, errors='coerce')
pop_projection = pop_projection.resample('YS').interpolate(method='linear')
#pop_projection = pop_projection[pop_projection.index > '2021-01-01']


In [91]:
pop_projection.to_excel("Scripts\Projected_POP.xlsx")