# Laboratory work 1 – Modeling of demographic processes

Problem Statement
- Determine the «survival» rates independently for men and women for all age groups (“0-4” -> “5-9” -> “10-14” ...) according to 2000-2005 years (data for Russia or any other country)
- Determine the fertility rate for women in the age category “20- ... -39”
- Calculate boys/girls ratio for newborn children
- Predict the change in the country's population and demographic profile for 100 years and compare with existing prediction!

**For that:**
- Write out equations for modeling
- Define input/output data and parameters

In [None]:
import math
from typing import Optional

import numpy as np
import pandas as pd
!pip install openpyxl
import openpyxl


import scipy
from scipy.sparse import diags
from scipy.optimize import root
from tqdm.notebook import tqdm

from sklearn.metrics import (
    mean_absolute_error as mae,
    mean_squared_error as mse,
    r2_score as r2,
)

import plotly.express as px
import plotly.graph_objects as go

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/250.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.0/250.0 kB[0m [31m24.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


# Importing Data
> The predictions and anlysis was made for "Republic of Korea" region!

![img](https://ichef.bbci.co.uk/news/976/cpsprodpb/3984/production/_129042741_bbcm_south-korea_country_profile_200323.png)

In [None]:
both_1950_2005_df = pd.read_excel(
    "/data/notebook_files/age_data_2005.xlsx",
    sheet_name="both; 1950-2005, estimates"
)

m_1950_2005_df = pd.read_excel(
    "/data/notebook_files/age_data_2005.xlsx",
    sheet_name="m; 1950-2005, estimates"
)

f_1950_2005_df = pd.read_excel(
    "/data/notebook_files/age_data_2005.xlsx",
    sheet_name="f; 1950-2005, estimates"
)

## Constants

In [None]:
AGE_GROUPS = [f"{k*5} - {(k+1)*5-1}" for k in range(20)] + ["100+"]
SURVIVAL_RATES = [f"SR: {k*5} - {(k+1)*5-1}" for k in range(1,20)] + ["100+"]
YEARS = np.sort(both_1950_2005_df["Year"].unique())
REGION = "Republic of Korea"

In [None]:
def dfPreprocessing(df : pd.DataFrame,
                    region: str,
                    years : np.ndarray,
                    ) -> pd.DataFrame:
    df = df[df["Region"] == region]\
            .replace("-", np.nan)\
            .reset_index(drop=True)

    df.loc[df["Year"].isin(years[1:]), SURVIVAL_RATES] = df[df['Year'].isin(years[1:])][AGE_GROUPS[1:]].values / df[df['Year'].isin(years[:-1])][AGE_GROUPS[:-1]].values
    return df


In [None]:
both_cleaned_1950_2005_df = dfPreprocessing(both_1950_2005_df, REGION, YEARS)
m_cleaned_1950_2005_df  = dfPreprocessing(m_1950_2005_df, REGION, YEARS)
f_cleaned_1950_2005_df  = dfPreprocessing(f_1950_2005_df, REGION, YEARS)

In [None]:
def raft(df_true: pd.DataFrame,
         years, features,
         df_pred: Optional[pd.DataFrame] = None,
         remove=None, prefixes=None,
         pltTitle : str = "",
         yTitle: str = "") -> None:

    fig = go.Figure()
    # Only thing I figured is - I could do this
    for j, year in enumerate(years):
        subdata = df_true[df_true["Year"]==year][features].dropna(axis=1)
        X = subdata.columns
        if remove is not None:
            X = [k.replace(remove, "") for k in X]
        Y = subdata.values.reshape(-1)
        if prefixes is not None:
            lbl = f"{year}-{prefixes[j]}"  # i
        else:
            lbl = f"{year}"

            fig.add_trace(go.Scatter(x=X, y=Y, mode="lines", name=lbl, line={"dash": "solid"}))

    if df_pred is not None:
        for j, year in enumerate(years):
            pred_subdata = df_pred[df_pred["Year"]==year][features].dropna(axis=1)
            pred_X = pred_subdata.columns
            if remove is not None:
                pred_X = [k.replace(remove, "") for k in pred_X]
            pred_Y = pred_subdata.values.reshape(-1)
            if prefixes is not None:
                lbl = f"{year}-{prefixes[j]}" # i
            else:
                lbl = f"{year}"

            fig.add_trace(go.Scatter(x=pred_X, y=pred_Y, mode="lines", name=lbl+"-pred", line={"dash": "dot"}))

    # Removing repeated legend names
    names = set()
    fig.for_each_trace(
        lambda trace:
            trace.update(showlegend=False)
            if (trace.name in names) else names.add(trace.name))

    fig.update_layout(title=pltTitle, legend_title="year")
    fig.update_xaxes(title_text="Age Groups")
    fig.update_yaxes(title_text=yTitle)
    fig.show()

# Data Overview

In [None]:
raft(both_cleaned_1950_2005_df, features=AGE_GROUPS, years=both_cleaned_1950_2005_df["Year"].unique()[-5:], pltTitle="Both", yTitle="Population")

In [None]:
raft(m_cleaned_1950_2005_df, features=AGE_GROUPS, years=m_cleaned_1950_2005_df["Year"].unique()[-5:], pltTitle="Sigma Males", yTitle="Population")

In [None]:
raft(f_cleaned_1950_2005_df, features=AGE_GROUPS, years=f_cleaned_1950_2005_df["Year"].unique()[-5:], pltTitle="Women", yTitle="Population")

### «Survival» rates according to 2000-2005 years

In [None]:
raft(both_cleaned_1950_2005_df, features=SURVIVAL_RATES, years=both_cleaned_1950_2005_df["Year"].unique(),  pltTitle="Both", yTitle="Survival Rate", remove='SR: ')

In [None]:
raft(m_cleaned_1950_2005_df, features=SURVIVAL_RATES, years=m_cleaned_1950_2005_df["Year"].unique(), pltTitle="Males", yTitle="Survival Rate", remove='SR: ')

In [None]:
raft(f_cleaned_1950_2005_df, features=SURVIVAL_RATES, years=f_cleaned_1950_2005_df["Year"].unique(), pltTitle="Women", yTitle="Survival Rate", remove='SR: ')

# The country's population and demographic profile for 100 years and compare with existing prediction!

## Equations for modeling

$ n - number\ of\ age\ groups\\ $
$ K = [0, 1, 2, ..., k-1, k, k+1, ...] - time\ periods\ ids\ to\ consider\\ $
$ Suppose\ we\ know\ all\ information\ at\ K[0]\ and\ K[1] \\ $
$ \\ $
$ P_{kn}^F - females\ population\ number\ of\ age\ group\ n\ at\ year\ k\\ $
$ P_{kn}^M - males\ population\ number\ of\ age\ group\ n\ at\ year\ k\\ $
$ P_k^F = [P_{k,0}^F, P_{k,1}^F, ..., P_{k,n-1}^F] - females\ demographic\ profile\ at\ year\ k\\ $
$ P_k^M = [P_{k,0}^M, P_{k,1}^M, ..., P_{k,n-1}^M] - males\ demographic\ profile\ at\ year\ k\\ $
$ \\ $
$ SR^F = \frac{P_{1,1:n+1}^F}{P_{0,0:n}^F} - females\ survival\ rate\ profile\\ $
$ SR^M = \frac{P_{1,1:n+1}^M}{P_{0,0:n}^M} - males\ survival\ rate\ profile\\ $
$ \\ $
$ FR = \frac{P_{1,0}^F}{\sum_{i={n_1}}^{n_2} P_{1,i}^F} - fertility\ rate\\ $
$ \ [n_1,...,n_2] - productive\ ages\ range\ \\ $
$ \gamma_1,\gamma_2 - tuning\ parameters\\ $
$ \\ $
$ BG = \frac{P_{1,0}^M}{P_{1,0}^F} - boys/girs\ ratio\\ $
$ \\ $
$ Formulas\ to\ calculate\ new\ profiles:\\ $
$ for\ k=1,2,..: \\ $
$ \ \ \ \color{purple}{P_{k+1,1:n+1}^F = \gamma_2 SR^F \cdot P_{k,0:n}^F}\\ $
$ \ \ \ \color{purple}{P_{k+1,1:n+1}^M = \gamma_2 SR^M \cdot P_{k,0:n}^M}\\ $
$ \ \ \ \color{purple}{P_{k+1,0}^F = \gamma^{k-1}_1 FR^F \sum_{i={n_1}}^{n_2} P_{k+1,i}^F}\\ $
$ \ \ \ \color{purple}{P_{k+1,0}^M = P_{k+1,0}^F BG} \\ $
$ \ \ \ \color{purple}{P_{k+1} = P_{k+1}^F + P_{k+1}^M - total\ demographic\ profile} \\ $

## Model() returns three "predicted" dataframes both, males, females population from 2010 - 2050

In [None]:
def myModel(man_df, wman_df, n1=4, n2=8, k_max=9, gamma1=1, gamma2=1):
    sr_m = man_df[man_df["Year"]==2005][SURVIVAL_RATES].mean(axis=0).values
    sr_f = wman_df[wman_df["Year"]==2005][SURVIVAL_RATES].mean(axis=0).values

    df_prof_m = [man_df[man_df["Year"]==2005][AGE_GROUPS].values.reshape(-1)]
    df_prof_f = [wman_df[wman_df["Year"]==2005][AGE_GROUPS].values.reshape(-1)]

    fert_rate = wman_df[-3:]["FR"].mean()
    malesfemales_ratio = (man_df[man_df["Year"]==2005][AGE_GROUPS[0]] \
                / wman_df[wman_df["Year"]==2005][AGE_GROUPS[0]]).values[0]

    years = [2005]

    for k in range(k_max):
        prof_m, prof_f = np.zeros(len(AGE_GROUPS)), np.zeros(len(AGE_GROUPS))
        prof_f[1:] = gamma2 * sr_f * df_prof_f[-1][:-1]
        prof_m[1:] = gamma2 * sr_m * df_prof_m[-1][:-1]
        prof_f[0] = gamma1**k * fert_rate * (prof_f[n1:n2]).sum()
        prof_m[0] = gamma1**k * fert_rate * (prof_f[n1:n2]).sum() * malesfemales_ratio

        df_prof_m.append(prof_m)
        df_prof_f.append(prof_f)
        years.append(years[-1] + 5)

    prof = [df_prof_m[k] + df_prof_f[k] for k in range(len(years))]
    pred = pd.DataFrame()
    pred["Year"] = years[1:]
    pred[AGE_GROUPS] = np.array(prof[1:])

    pred_m = pd.DataFrame()
    pred_m["Year"] = years[1:]
    pred_m[AGE_GROUPS] = np.array(df_prof_m[1:])

    pred_f = pd.DataFrame()
    pred_f["Year"] = years[1:]
    pred_f[AGE_GROUPS] = np.array(df_prof_f[1:])

    return pred, pred_m, pred_f

## Evaluate(true, pred) returns metrics

In [None]:
def myEvaluate(df_tr, df_pred):
    MAE = mae(
        df_tr[AGE_GROUPS].values.T,
        df_pred[AGE_GROUPS].values.T,
        multioutput="raw_values"
    )
    RMSE = mse(
        df_tr[AGE_GROUPS].values.T,
        df_pred[AGE_GROUPS].values.T,
        squared=False,
        multioutput="raw_values"
    )
    R2 = r2(
        df_tr[AGE_GROUPS].values.T,
        df_pred[AGE_GROUPS].values.T,
        multioutput="raw_values"
    )

    df = pd.DataFrame()
    df["Year"] = df_tr["Year"]
    df["Orig_Est"] = df_tr[AGE_GROUPS].sum(axis=1).apply(int)
    df["Pred"] = df_pred[AGE_GROUPS].sum(axis=1).apply(int)
    df["MAE"] = np.round(MAE, 0).astype("int")
    df["RMSE"] = np.round(RMSE, 0).astype("int")
    df["R2"] = np.round(R2, 3)
    return df

## Getting three predicted df

In [None]:
n1, n2 = 4, 8
gamma1 = 1 # .04
gamma2 = 1 #.0085
kmax = 9
m_cleaned_1950_2005_df["FR"] = m_cleaned_1950_2005_df[AGE_GROUPS[0]]/m_cleaned_1950_2005_df[AGE_GROUPS[n1:n2]].sum(axis=1)
f_cleaned_1950_2005_df["FR"] = f_cleaned_1950_2005_df[AGE_GROUPS[0]]/f_cleaned_1950_2005_df[AGE_GROUPS[n1:n2]].sum(axis=1)
both_pred_2010_2050_df, m_pred_2010_2050_df, f_pred_2010_2050_df = myModel(m_cleaned_1950_2005_df, f_cleaned_1950_2005_df, n1, n2, kmax, gamma1, gamma2)

# Compare original data & predicted

## Both

In [None]:
both_2010_2050_df = pd.read_excel(
    "/data/notebook_files/age_data_2005.xlsx",
    sheet_name='both; 2010-50, medium-fertility'
)

In [None]:
YEARS_2010_2050 = np.sort(both_2010_2050_df["Year"].unique())

In [None]:
both_cleaned_2010_2050_df = dfPreprocessing(both_2010_2050_df, REGION, YEARS_2010_2050)
both_cleaned_2010_2050_df["FR"] = both_cleaned_2010_2050_df[AGE_GROUPS[0]]/both_cleaned_2010_2050_df[AGE_GROUPS[n1:n2]].sum(axis=1)

In [None]:
myEvaluate(both_cleaned_2010_2050_df, both_pred_2010_2050_df)

Unnamed: 0,Year,Orig_Est,Pred,MAE,RMSE,R2
0,2010,48565,48777,36,88,0.996
1,2015,49089,49239,66,108,0.994
2,2020,49387,49228,89,121,0.992
3,2025,49445,48660,109,136,0.989
4,2030,49140,47672,130,153,0.984
5,2035,48416,46189,158,180,0.975
6,2040,47353,44240,191,225,0.954
7,2045,46042,41953,229,284,0.913
8,2050,44540,39412,275,344,0.843


In [None]:
raft(both_cleaned_2010_2050_df, features=AGE_GROUPS, years=both_cleaned_2010_2050_df["Year"].unique(), pltTitle="Both", yTitle="Population")

In [None]:
raft(both_pred_2010_2050_df, features=AGE_GROUPS, years=both_pred_2010_2050_df["Year"].unique(), pltTitle="Prediction Both", yTitle="Population")

In [None]:
raft(
    df_true=both_cleaned_2010_2050_df,
    df_pred=both_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[:3],
    pltTitle="Both | True & Pred",
    yTitle="Population"
)

In [None]:
raft(
    df_true=both_cleaned_2010_2050_df,
    df_pred=both_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[3:6],
    pltTitle="Both | True & Pred",
    yTitle="Population"
)

In [None]:
raft(
    df_true=both_cleaned_2010_2050_df,
    df_pred=both_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[6:9],
    pltTitle="Both | True & Pred",
    yTitle="Population"
)

## Males

In [None]:
m_2010_2050_df = pd.read_excel(
    "/data/notebook_files/age_data_2005.xlsx",
    sheet_name="m; 2010-50, medium-fertility"
)

In [None]:
m_cleaned_2010_2050_df = dfPreprocessing(m_2010_2050_df, REGION, YEARS_2010_2050)
m_cleaned_2010_2050_df["FR"] = m_cleaned_2010_2050_df[AGE_GROUPS[0]]/m_cleaned_2010_2050_df[AGE_GROUPS[n1:n2]].sum(axis=1)

In [None]:
raft(m_cleaned_2010_2050_df, features=AGE_GROUPS, years=m_cleaned_2010_2050_df["Year"].unique(), pltTitle="Men", yTitle="Population")

In [None]:
raft(m_pred_2010_2050_df, features=AGE_GROUPS, years=m_pred_2010_2050_df["Year"].unique(), pltTitle="Prediction Men", yTitle="Population")

In [None]:
myEvaluate(m_cleaned_2010_2050_df, m_pred_2010_2050_df)

Unnamed: 0,Year,Orig_Est,Pred,MAE,RMSE,R2
0,2010,24260,24374,19,48,0.996
1,2015,24425,24510,33,59,0.994
2,2020,24471,24393,44,65,0.991
3,2025,24390,23984,56,71,0.989
4,2030,24127,23361,70,80,0.984
5,2035,23658,22493,84,94,0.975
6,2040,23023,21412,100,116,0.956
7,2045,22274,20205,117,141,0.923
8,2050,21459,18920,136,166,0.87


In [None]:
raft(
    df_true=m_cleaned_2010_2050_df,
    df_pred=m_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[:3],
    pltTitle="Men | True & Pred",
    yTitle="Population"
)

In [None]:
raft(
    df_true=m_cleaned_2010_2050_df,
    df_pred=m_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[3:6],
    pltTitle="Men | True & Pred",
    yTitle="Population"
)

In [None]:
raft(
    df_true=m_cleaned_2010_2050_df,
    df_pred=m_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[6:9],
    pltTitle="Men | True & Pred",
    yTitle="Population"
)

## Females

In [None]:
f_2010_2050_df = pd.read_excel(
    "/data/notebook_files/age_data_2005.xlsx",
    sheet_name="m; 2010-50, medium-fertility"
)

In [None]:
f_cleaned_2010_2050_df = dfPreprocessing(f_2010_2050_df, REGION, YEARS_2010_2050)
f_cleaned_2010_2050_df["FR"] = f_cleaned_2010_2050_df[AGE_GROUPS[0]]/f_cleaned_2010_2050_df[AGE_GROUPS[n1:n2]].sum(axis=1)

In [None]:
raft(
    f_cleaned_2010_2050_df,
    features=AGE_GROUPS,
    years=m_cleaned_2010_2050_df["Year"].unique(),
    pltTitle="Women",
    yTitle="Population"
)

In [None]:
raft(
    f_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=f_pred_2010_2050_df["Year"].unique(),
    pltTitle="Prediction Women",
    yTitle="Population"
)

In [None]:
myEvaluate(f_cleaned_2010_2050_df, f_pred_2010_2050_df)

Unnamed: 0,Year,Orig_Est,Pred,MAE,RMSE,R2
0,2010,24260,24402,105,125,0.973
1,2015,24425,24729,115,133,0.967
2,2020,24471,24835,119,140,0.961
3,2025,24390,24675,124,147,0.952
4,2030,24127,24311,125,150,0.945
5,2035,23658,23696,122,148,0.939
6,2040,23023,22828,116,140,0.936
7,2045,22274,21747,108,128,0.936
8,2050,21459,20492,102,120,0.932


In [None]:
raft(
    df_true=f_cleaned_2010_2050_df,
    df_pred=f_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[0:3],
    pltTitle="Woman | True & Pred",
    yTitle="Population"
)

In [None]:
raft(
    df_true=f_cleaned_2010_2050_df,
    df_pred=f_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[3:6],
    pltTitle="Woman | True & Pred",
    yTitle="Population"
)

In [None]:
raft(
    df_true=f_cleaned_2010_2050_df,
    df_pred=f_pred_2010_2050_df,
    features=AGE_GROUPS,
    years=both_pred_2010_2050_df["Year"].unique()[6:9],
    pltTitle="Woman | True & Pred",
    yTitle="Population"
)

## Summary

As a result of first lab we have:
- Determined the «survival» rates for all age groups according to 2000-2005 years for South Korea
- Determined the fertility rate for women using the formula
```
# Pseudo
women_df[AGE_GROUPS] / women_df[AGE_GROUPS].sum()
```
- Calculated boys/girls ratio for newborn children
- Predicted the change in the country's population!

In [None]:
both_cleaned_2010_2050_df.head(100)

Unnamed: 0,Index,Variant,Region,Notes,Country code,Year,0 - 4,5 - 9,10 - 14,15 - 19,...,SR: 55 - 59,SR: 60 - 64,SR: 65 - 69,SR: 70 - 74,SR: 75 - 79,SR: 80 - 84,SR: 85 - 89,SR: 90 - 94,SR: 95 - 99,FR
0,77,Medium,Republic of Korea,,410,2010,2223.961,2409.136,3062.623,3406.574,...,,,,,,,,,,0.149204
1,77,Medium,Republic of Korea,,410,2015,2175.622,2221.257,2407.181,3057.939,...,0.975775,0.96036,0.939552,0.9015,0.83787,0.747527,0.629097,0.486259,0.339103,0.15485
2,77,Medium,Republic of Korea,,410,2020,2139.442,2173.203,2219.615,2403.462,...,0.978335,0.964539,0.944952,0.90998,0.851154,0.765558,0.655924,0.524606,0.375392,0.160792
3,77,Medium,Republic of Korea,,410,2025,2106.516,2137.241,2171.738,2216.314,...,0.980419,0.967901,0.949823,0.917149,0.862665,0.783053,0.676976,0.551995,0.407866,0.175824
4,77,Medium,Republic of Korea,,410,2030,1985.182,2104.446,2135.886,2168.626,...,0.981996,0.970266,0.953255,0.922834,0.871243,0.796287,0.694935,0.570722,0.429273,0.180728
5,77,Medium,Republic of Korea,,410,2035,1844.97,1983.253,2103.172,2132.907,...,0.983148,0.972142,0.95573,0.926865,0.878121,0.805673,0.707854,0.585348,0.441382,0.189085
6,77,Medium,Republic of Korea,,410,2040,1779.273,1843.177,1982.08,2100.311,...,0.984218,0.973834,0.958173,0.930459,0.884131,0.815215,0.719721,0.599391,0.454714,0.201252
7,77,Medium,Republic of Korea,,410,2045,1779.313,1777.566,1842.097,1979.359,...,0.985058,0.975426,0.960438,0.934061,0.889542,0.823657,0.731984,0.612756,0.468532,0.20831
8,77,Medium,Republic of Korea,,410,2050,1799.76,1777.651,1776.54,1839.492,...,0.985961,0.976642,0.962508,0.93735,0.894952,0.831397,0.743333,0.627483,0.482757,0.21665


In [None]:
both_pred_2010_2050_df.head(10)

Unnamed: 0,Year,0 - 4,5 - 9,10 - 14,15 - 19,20 - 24,25 - 29,30 - 34,35 - 39,40 - 44,...,55 - 59,60 - 64,65 - 69,70 - 74,75 - 79,80 - 84,85 - 89,90 - 94,95 - 99,100+
0,2010,2610.143974,2404.0155,3055.890146,3383.28392,3172.339461,3653.1,3726.47078,4189.35835,4047.181089,...,2827.197064,2295.480788,1933.285383,1609.722135,1040.791078,541.641069,245.501564,70.670046,12.565971,1.272695
1,2015,2410.488648,2600.973007,2396.532458,3029.899907,3331.038916,3106.577993,3594.361149,3690.25198,4146.980495,...,3821.239205,2747.735283,2164.74857,1727.378427,1308.437347,730.903103,305.472629,100.223342,18.828949,1.926914
2,2020,2248.835233,2402.019187,2592.87689,2376.206624,2983.224973,3261.929083,3056.468208,3559.414947,3652.901056,...,3949.566948,3714.779068,2591.180812,1934.174546,1403.315788,914.67338,410.160861,124.904523,26.708578,2.88349
3,2025,2007.007616,2240.933755,2394.542359,2570.88579,2339.711653,2921.444978,3209.271752,3026.610962,3523.367494,...,3869.364676,3839.673019,3504.281292,2315.104398,1571.297977,980.255535,510.444967,166.834862,33.333314,4.090737
4,2030,1908.215092,1999.955821,2233.958342,2374.233404,2531.400838,2291.368785,2874.364988,3177.884832,2995.70315,...,3964.855427,3761.226368,3622.27177,3132.361802,1880.659733,1097.57792,546.536898,206.406491,44.31454,5.110043
5,2035,1753.147284,1901.510412,1993.730506,2215.011357,2337.768738,2479.097309,2254.521337,2846.325284,3145.364812,...,3492.309309,3854.101463,3547.682447,3238.046489,2546.32917,1313.573344,611.937231,220.7821,54.533289,6.77305
6,2040,1622.768088,1746.987449,1895.591531,1976.820977,2180.992103,2289.466014,2439.231003,2232.598083,2817.328958,...,3368.312791,3394.64012,3635.349692,3170.6393,2632.508137,1780.265275,732.294534,247.196309,58.278643,8.306151
7,2045,1553.516998,1617.066351,1741.549555,1879.514353,1946.459971,2135.928681,2252.649165,2415.511609,2209.981119,...,2861.92759,3274.001545,3201.824981,3249.070754,2576.808897,1840.779368,993.658939,295.786318,65.249784,8.871405
8,2050,1436.08325,1548.058581,1612.032866,1726.778861,1850.647831,1906.242426,2101.580862,2230.744116,2391.04167,...,3004.39517,2780.441203,3087.903776,2861.433965,2640.651007,1800.949939,1027.613751,401.870529,78.068536,9.932454


In [None]:
# SAVE DATA 4 LAB 2
both_cleaned_2010_2050_df.to_csv(f"./both_cleaned_2010_2050_df.csv", index=False)
both_pred_2010_2050_df.to_csv(f"./both_pred_2010_2050_df.csv", index=False)