# Notebook Setup

In [1]:
import matplotlib.pyplot as plt
import json
import requests
import pandas as pd
from io import StringIO
import seaborn as sns
import plotly.express as px
from sklearn.preprocessing import StandardScaler
from mpl_toolkits.mplot3d import Axes3D
import numpy as np
from sklearn.feature_selection import SelectKBest, mutual_info_regression
from sklearn.model_selection import train_test_split as tts
sns.set_style("white")

# Dela - Predicting the amount of deaths per year

### 1. Intro
<b>Who is our client?</b><br>
Our client is Dela. Dela is a funeral insurer and caretaker for funeral services. In this semester, they will give us some inside problems that we can investigate on.<br><br>
<b>Project explanation</b><br>
Dela was faced with unprecedented challenge by the fluctuating demands during the Covid19 first year, and that way they are looking to enhance their abilities to react to surprising serge or drops in demands.
We cannot predict when Dela needs to upscale or downscale. However, we can forecast the amount of deaths in the upcoming years. Based on that knowledge and Dela experience, they can conclude themselves when to upscale or downscale.<br><br>
<b>Project goal</b><br>
In our project, we are going to forecast the amount of deaths per year. In this way we can help them conclude easier what to do on higher, lower demand.<br><br><br>
<b>Document explanation</b><br>
In this document there will be the testing and implementation of the project delivered proposal hypothesis. In this document, we will do that by understanding and experimenting with the collected data. After the understanding, we will see if our hypothesis can be validated and we will do that by applying machine learning onto our dataset.<br><br>
<b>Document setup:</b><br>
<table style="font-size: 14px !important; margin: 0 !important">
    <tr>
        <th style="text-align: left;">Data requirements</th>
        <td style="text-align: left;">In this chapter, we are going to setup the requirements for the data that is needed for the prediction. We will answer questions like ‘Which references are trustworthy?’, ‘Do we need some specific features?’, etc…</td>
    </tr>
    <tr>
        <th style="text-align: left;">Data collection</th>
        <td style="text-align: left;">In this chapter, we are going to explain where we found our data, where we are going to store our data and references to he subchapters of each dataset</td>
    </tr>
    <tr>
        <th style="text-align: left;">Data understanding</th>
        <td style="text-align: left;">In this chapter, we are going to understand each specific dataset that we downloaded to really understand the value of each dataset and how it is going to bring a value to Dela.</td>
    </tr>
    <tr>
        <th style="text-align: left;">Data preparation</th>
        <td style="text-align: left;">In this chapter, we are going to prepare our data so it is clean to work with, think about removing all the data that has invalid records, data that has wrong values or data that has similar features with different names.</td>
    </tr>
</table>

### 2. Provisioning
<b>========== TODO: We need to write an intro here ==========</b> 

### 2.1 Data Requirements
In this chapter, we are trying to setup the expectations/ requirements of the data we are going to collect for the provisioning fase.<br>
<b>========== TODO: Fill in this table ==========</b> 

<table style="font-size: 14px !important; margin: 0 !important">
    <tr>
        <th style="text-align: left !important">Data Domain</th>
        <td style="text-align: left !important"></td>
    </tr>
    <tr>
        <th style="text-align: left !important">Data type</th>
        <td style="text-align: left !important"></td>
    </tr>
    <tr>
        <th style="text-align: left !important">Target Variable</th>
        <td style="text-align: left !important"></td>
    </tr>
    <tr>
        <th style="text-align: left !important">Expected Features</th>
        <td style="text-align: left !important"></td>
    </tr>
</table>

### 2.2 Data Collection
Because we want to search for data that contains the amount of deaths in the Netherlands, we began searching for an open data bank that is governmental. In this case the data is from a trustworthy source and will higher the chance of a good prediction. That's when we landed on CBS (stands for `Centraal Bureau voor de Statistiek`, translated to English that means  `Central Station of Statistics`). 

<table style="font-size: 14px !important; margin: 0 !important">
    <tr>
        <th style="text-align: left !important">Data Source</th>
        <td style="text-align: left !important">We got our data from the official   <a href="https://opendata.cbs.nl/statline/portal.html?_la=nl&_catalog=CBS" target="_blank">CBS</a> Website</td>
    </tr>
    <tr>
        <th style="text-align: left !important">Data Storage</th>
        <td style="text-align: left !important">We stored all of our datasets on <a href="https://github.com/i454038/AI-car-price-prediction" target="_blank">Github</a>, so it is globally accessible</td>
    </tr>
</table>

Load in the datasets from Github

In [2]:
def fetchDatasets():
    data = requests.get("https://raw.githubusercontent.com/Dipsaus2002/DELA_Upsacling_Prediction/main/classes/datasets.conf.json").json()
    datasets = []
    for datasetConfig in data:
        datasets.append(datasetConfig)
    return datasets
    
def loadDatasetsIntoDataFrames(datasets):
    dataframes = {}
    # set categories
    for dataset in datasets:
        dataframes[dataset['tag']] = {}
    # load files into categories
    for dataset in datasets:
        data = requests.get(dataset['url']).text
        dataframe = pd.read_csv(StringIO(data), error_bad_lines=False, sep=dataset['seperator'], low_memory=False, skiprows=dataset['skip'])
        dataframes[dataset['tag']][dataset['name']] = dataframe
    return dataframes

datasets = fetchDatasets()
dataframes = loadDatasetsIntoDataFrames(datasets)

### 2.3 Data Understanding

<b>========== TODO: ForEach dataset show and document the understanding ==========</b>

In [3]:
# ex = len(dataframes['lifeExpectency']['lifeExpectencyPerRegion']['Gemeente'].unique()
# print(ex, pop)
# sns.pointplot(x="Gemeente", y="Bij geboorte", data=dataframes['lifeExpectency']['lifeExpectencyPerRegion'])

### 2.4 Data Preperation

<b>========== TODO: clean a bit more of the datasets and document a bit ==========</b>

To have an overview of what will be done in this chapter, there will be subjects listed below.
- <b>We have multiple datasets that need to be aligned before we can merge them together.</b>
    - It contains renaming, cleaning and formatting features in the right type
- <b>There are also datasets that need to be concatenated because there are to much rows for 1 file.</b>

1. The next chapters consist of each separate dataset with there cleaning/aligning.
2. the chapter after that will actually merge the different datasets.
3. We are going to reduce the dimensionality of the datasets by doing some feature cutting. 

### LifeExpectency
This dataset contains the life expectancy of people in the Netherlands per municipality.   

**To align this dataset with the other datasets, we will perform the following actions:**
1. rename columns to English and making the renaming consistent across the datasets
2. transform the type of columns where needed

In [4]:
dataframes['lifeExpectency']['lifeExpectencyPerRegion'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 355 entries, 0 to 354
Data columns (total 8 columns):
 #   Column                           Non-Null Count  Dtype 
---  ------                           --------------  ----- 
 0   id                               355 non-null    int64 
 1   Gemeente                         355 non-null    object
 2   Groep_rij                        355 non-null    object
 3   Geslacht                         355 non-null    object
 4   Bij geboorte                     355 non-null    object
 5   Bij geboorte (afwijking tov NL)  355 non-null    object
 6   Bij 65 jaar                      355 non-null    object
 7   Bij 65 jaar (afwijking tov NL)   355 non-null    object
dtypes: int64(1), object(7)
memory usage: 22.3+ KB


1. After plotting the information, There are columns that are registered as Dtype `Object`, that need to be `float`. For example:
- **'Bij geboorte'** stands for **life expectancy registered at birth** within that municipality. Because the unit of life expectency within this dataset is a number, it should be registered as Dtype `float`  

2. To be consistent in the naming across the datasets, we are going to use the English translation of `Gemeente` witch is `Municipality`.
3. In the following cell, you will see the cleaning of this dataset and the approach we took in this cleaning.

In [5]:
def fetchJsonMap(name, mappingType):
    return requests.get(
        f"https://raw.githubusercontent.com/Dipsaus2002/DELA_Upsacling_Prediction/main/classes/{mappingType}/{name}.{mappingType}.json"
    ).json()
    

dataframes['lifeExpectency']['lifeExpectencyPerRegion'] = (
    dataframes['lifeExpectency']['lifeExpectencyPerRegion']
        .rename(columns=fetchJsonMap('lifeExpectency.lifeExpectencyPerRegion', 'renaming'))
        .assign(LifeExpectancy = lambda x: x.LifeExpectancy.str.replace(',', '.').astype(float))
        .assign(LifeExpectancyWhen65OrOlder = lambda x: x.LifeExpectancyWhen65OrOlder.str.replace(',', '.').astype(float))
)

After modifying the dataset, to confirm that the features are renamed and changed in type, we will print the info from the dataset again.

In [6]:
dataframes['lifeExpectency']['lifeExpectencyPerRegion'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 355 entries, 0 to 354
Data columns (total 8 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   id                             355 non-null    int64  
 1   Municipality                   355 non-null    object 
 2   Groep_rij                      355 non-null    object 
 3   Geslacht                       355 non-null    object 
 4   LifeExpectancy                 355 non-null    float64
 5   LifeExpectancyNL               355 non-null    object 
 6   LifeExpectancyWhen65OrOlder    355 non-null    float64
 7   LifeExpectancyWhen65OrOlderNL  355 non-null    object 
dtypes: float64(2), int64(1), object(5)
memory usage: 22.3+ KB


As you can see the info of the cleaned dataset again, we can see that the columns are renamed and the Dtypes of the needed features are changed to type `float`.

### PopulationChange
This dataset contains the change in population per municipality. 

**To align this dataset with the other datasets, we will perform the following actions:**
1. rename columns to English and making the renaming consistent across the datasets
2. transform the type of columns where needed

In [7]:
dataframes['populationChange']['pop2002_2020'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10488 entries, 0 to 10487
Data columns (total 18 columns):
 #   Column                               Non-Null Count  Dtype  
---  ------                               --------------  -----  
 0   ID                                   10488 non-null  int64  
 1   RegioS                               10488 non-null  object 
 2   Perioden                             10488 non-null  object 
 3   BevolkingAanHetBeginVanDePeriode_1   8056 non-null   float64
 4   LevendGeborenKinderen_2              8055 non-null   float64
 5   Overledenen_3                        8055 non-null   float64
 6   TotaleVestiging_4                    8055 non-null   float64
 7   VestigingVanuitEenAndereGemeente_5   8055 non-null   float64
 8   Immigratie_6                         8055 non-null   float64
 9   TotaalVertrekInclAdmCorrecties_7     8055 non-null   float64
 10  VertrekNaarAndereGemeente_8          8055 non-null   float64
 11  EmigratieInclusiefAdmCorrect

1. After plotting the information, We can conclude that we need to rename some columns again. For example:
    - **RegioS** is the same as **Municipality**, we will rename it to Municipality to make it consistent across the datasets
    - **Perioden** is the same as **Year**, we will rename it to Year to make it consistent across the datasets
2. The printing also shows that **Year** needs to be of type `int` and we need to transform that.
2. In the following cell, you will see the cleaning of this dataset and the approach we took in this cleaning.

In [10]:
# transform dataset to align with others
dataframes['populationChange']['pop2002_2020'] = (
    dataframes['populationChange']['pop2002_2020']
        .rename(columns=fetchJsonMap('populationChange.pop2002_2020', 'renaming'))
        .assign(Year = lambda x: pd.to_datetime(x.Year.str[:4]).dt.year)
)

After modifying the dataset, to confirm that the features are renamed and changed in type, we will print the info from the dataset again.

In [11]:
dataframes['populationChange']['pop2002_2020'].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10488 entries, 0 to 10487
Data columns (total 18 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   ID                                    10488 non-null  int64  
 1   Municipality                          10488 non-null  object 
 2   Year                                  10488 non-null  int64  
 3   PopulationAtBeginOfPeriod             8056 non-null   float64
 4   AliveBornChildren                     8055 non-null   float64
 5   Deceased                              8055 non-null   float64
 6   TotalLocations                        8055 non-null   float64
 7   LocationsFromOtherMunicipality        8055 non-null   float64
 8   Immigration                           8055 non-null   float64
 9   TotaalVertrekInclAdmCorrecties_7      8055 non-null   float64
 10  AmountMovedToOtherMunicipality        8055 non-null   float64
 11  EmigratieInclus

As you can see the info of the cleaned dataset again, we can see that the columns are renamed and the Dtypes of the needed features are changed to type `int`. Unfortunately, that is not everything to make this dataset align to the other datasets. We will show that by printing out the data inside the dataset.

In [12]:
dataframes['populationChange']['pop2002_2020'].head()

Unnamed: 0,ID,Municipality,Year,PopulationAtBeginOfPeriod,AliveBornChildren,Deceased,TotalLocations,LocationsFromOtherMunicipality,Immigration,TotaalVertrekInclAdmCorrecties_7,AmountMovedToOtherMunicipality,EmigratieInclusiefAdmCorrecties_9,OverigeCorrecties_10,PopulationGrowth,RelativePopulationGrowth,PopulationGrowthSinceJanuari,RelativePopulationGrowthSinceJanuari,PopulationAtEndOfPeriod
0,14604,GM1680,2002,25552.0,289.0,251.0,1353.0,1121.0,232.0,1617.0,1498.0,119.0,-21.0,-247.0,-0.97,-247.0,-0.97,25305.0
1,14617,GM1680,2003,25305.0,279.0,241.0,1127.0,1071.0,56.0,1264.0,1111.0,153.0,12.0,-87.0,-0.34,-87.0,-0.34,25218.0
2,14630,GM1680,2004,25218.0,233.0,221.0,1167.0,1104.0,63.0,1077.0,1023.0,54.0,9.0,111.0,0.44,111.0,0.44,25329.0
3,14643,GM1680,2005,25329.0,230.0,231.0,1322.0,1254.0,68.0,1143.0,1062.0,81.0,0.0,178.0,0.7,178.0,0.7,25507.0
4,14656,GM1680,2006,25507.0,216.0,212.0,1369.0,1320.0,49.0,1326.0,1222.0,104.0,9.0,56.0,0.22,56.0,0.22,25563.0


In this data frame you can see that the municipalities are coded instead of containing the actual name of the municipality. To align it with the other datasets, we map those features to the names of the municipality. You can see the code below.

In [15]:
dataframes['populationChange']['pop2002_2020'] = (
    dataframes['populationChange']['pop2002_2020']
        .assign(Municipality = lambda x: x.Municipality.replace(fetchJsonMap('Municipality', 'mapping')))
)
dataframes['populationChange']['pop2002_2020'].head()

Unnamed: 0,ID,Municipality,Year,PopulationAtBeginOfPeriod,AliveBornChildren,Deceased,TotalLocations,LocationsFromOtherMunicipality,Immigration,TotaalVertrekInclAdmCorrecties_7,AmountMovedToOtherMunicipality,EmigratieInclusiefAdmCorrecties_9,OverigeCorrecties_10,PopulationGrowth,RelativePopulationGrowth,PopulationGrowthSinceJanuari,RelativePopulationGrowthSinceJanuari,PopulationAtEndOfPeriod
0,14604,Aa en Hunze,2002,25552.0,289.0,251.0,1353.0,1121.0,232.0,1617.0,1498.0,119.0,-21.0,-247.0,-0.97,-247.0,-0.97,25305.0
1,14617,Aa en Hunze,2003,25305.0,279.0,241.0,1127.0,1071.0,56.0,1264.0,1111.0,153.0,12.0,-87.0,-0.34,-87.0,-0.34,25218.0
2,14630,Aa en Hunze,2004,25218.0,233.0,221.0,1167.0,1104.0,63.0,1077.0,1023.0,54.0,9.0,111.0,0.44,111.0,0.44,25329.0
3,14643,Aa en Hunze,2005,25329.0,230.0,231.0,1322.0,1254.0,68.0,1143.0,1062.0,81.0,0.0,178.0,0.7,178.0,0.7,25507.0
4,14656,Aa en Hunze,2006,25507.0,216.0,212.0,1369.0,1320.0,49.0,1326.0,1222.0,104.0,9.0,56.0,0.22,56.0,0.22,25563.0


As you can see in the data frame above, the municipalities are mapped to their names and the data is correct enough to go further.

### Deaths

We do not merge this dataset yet, because it only has 20 municipalities and it will not have enough data for our need

In [None]:
dataframes['death']['reason_per_year_per_region2002_2020'] = (
    pd.concat([
        dataframes['death']['reasons_per_year_per_region2002_2015'], 
        dataframes['death']['reasons_per_year_per_region2016_2020']
    ])
)

dataframes['death']['reason_per_year_per_region2002_2020'] = (
    dataframes['death']['reason_per_year_per_region2002_2020']
        .rename(columns={'RegioS': 'Municipality', 'Perioden': 'Year'})
        .assign(Year = lambda x: pd.to_datetime(x.Year.str[:4]).dt.year)
)

### Merge Datasets

<table style="margin: 0 !important; font-size: 14px !important">
    <tr>
        <th style="text-align: left !important">Goal:</th>
        <td style="text-align: left !important">
            Merge the datasets:<br>
            <ul>
                <li>lifeExpectency</li>
                <li>population</li>
                <li>reason of death</li>
            </ul>
        </td>
    </tr>
    <tr>
        <th style="text-align: left !important">Possible Solutions:</th>
        <td style="text-align: left !important">
            <ul>
                <li>merge (pandas)</li>
                <li>concatinate (pandas)</li>
            </ul>
        </td>
    </tr>
</table>

**Merge**<br>
Merge data frames based with a database-style join. The join is done on a column<br>
**Concatinate**<br>
Concatenate concatenates objects along a particular axis(x or y). It is mainly used for datasets with exactly the same columns.<br>
**What is the best solution?**<br>
Our datasets do not have the same columns, it's even so that they have diffrent columns. We want to add features to one dataset and we do not want to align the dataset on a particular axis. This is why we are using the merge function to make one big dataset.

In [None]:
dataset = (
    dataframes['populationChange']['pop2002_2020']
        .merge(dataframes['lifeExpectency']['lifeExpectencyPerRegion'], how='outer', on="Municipality")
#         .merge(dataframes['death']['reason_per_year_per_region2002_2020'], how='outer', on=['Year', "Municipality"])
        .assign(Municipality = lambda x: x.Municipality.replace(fetchJsonMap('Municipality', 'mapping')))
        .fillna(0)
        .assign(Year = lambda x: x.Year.replace(0, 2002))
        .assign(Year = lambda x: x.Year.astype(int))
)

In [None]:
diffrence = [
    item for item in 
    dataframes['populationChange']['pop2002_2020']['Municipality'].unique() 
    if item not in 
    dataframes['lifeExpectency']['lifeExpectencyPerRegion']['Municipality'].unique()
]
dataset = dataset.query("Municipality not in @diffrence")

In [None]:
dataset = dataset[[
    i for i in list(dataset.columns) 
    if i not in 
    ['Groep_rij', 'Geslacht', 'LifeExpectancyNL', 'LifeExpectancyWhen65OrOlderNL', 'Geslacht_y']
]]

In [None]:
# dataset
dataset = dataset[[
    i for i in list(dataset.columns) 
    if i not in 
    [
        'TotalLocations', 'LocationsFromOtherMunicipality', 'TotaalVertrekInclAdmCorrecties_7', 'AmountMovedToOtherMunicipality', 'EmigratieInclusiefAdmCorrecties_9',
        'PopulationGrowthSinceJanuari', 'RelativePopulationGrowthSinceJanuari', 'PopulationAtEndOfPeriod',
    ]
]]

In [None]:
dataset

In [None]:
dataset.set_index(['Municipality', 'Year'], inplace=True)

In [None]:
municipalities = dataset.reset_index()['Municipality'].unique()
datasets = {}
for munucipality in municipalities:
    if(dataset.loc[munucipality].shape[0] <= 20 and dataset.loc[munucipality].shape[0] > 1):
        datasets[munucipality] = dataset.loc[munucipality]

In [None]:
figure = plt.figure(constrained_layout=False, figsize=(10,10))
ax = figure.add_subplot()
sns.heatmap(datasets['Amsterdam'].reset_index().corr(), annot=True, ax=ax) 

### Machine Learning

<b>Preperation</b><br>
heatmap, bla bla bla

<b>========== TODO: put machine learning into the Ai methodology chapters, doing other iteration next week ==========</b>

In [None]:
def doFeatureSelection(dataset):
    X = dataset.loc[:, dataset.columns != 'Deceased']
    y = dataset['Deceased']
    sel = SelectKBest(mutual_info_regression, k=5)
    sel.fit_transform(X,y)
    cols = sel.get_support()
    features_df_new = X.iloc[:,cols].keys()
    return features_df_new

In [None]:
def predictByNearestNeighbors(dataset):
    X = pd.DataFrame(dataset.index, columns={"Year"})
    y = dataset['Deceased']
    
    X_train, X_test, y_train, y_test = tts(X, y, test_size=0.2, random_state=20, shuffle=True)
    from sklearn.neighbors import KNeighborsRegressor
    knn = KNeighborsRegressor(5, weights='distance')
    prediction = knn.fit(X_train, y_train).predict(X_test)
    
    r2Score = knn.score(X_test, y_test)
    
    return X_train, y_train, X_test, y_test, prediction, r2Score

In [None]:
def plotNearestNeighbors(municipality, X_train, y_train, X_test, y_test, prediction, r2Score):
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.scatter(X_train, y_train, color="navy", label="train data", s=70)
    plt.scatter(X_test, y_test, color="green", label="test data", s=70)
    plt.scatter(X_test, prediction, color="orange", label="predicted data points", s=70)
    plt.legend(fontsize=14)
    plt.title(f"Amount of Deceased in {municipality} per year. Score: {r2Score}", fontsize=16)

In [None]:
metricsDictionary = {"X_train": {}, "y_train": {}, "X_test": {}, "y_test": {}, "prediction": {},"Score": {}}
for municipality in datasets.keys():
    dataset = datasets[municipality].reset_index()
    features = doFeatureSelection(dataset)
    dataset = dataset[features]
    X_train, y_train, X_test, y_test, prediction, r2Score = predictByNearestNeighbors(datasets[municipality])
    metricsDictionary["X_train"][municipality] = X_train
    metricsDictionary["y_train"][municipality] = y_train
    metricsDictionary["X_test"][municipality] = X_test
    metricsDictionary["y_test"][municipality] = y_test
    metricsDictionary["prediction"][municipality] = prediction
    metricsDictionary["Score"][municipality] = r2Score

In [None]:
scores = pd.DataFrame.from_dict(metricsDictionary).reset_index()
scores = scores.rename(columns={"index": "Municipality"})

In [None]:
fig = px.scatter(scores, x='Municipality', y='Score', color='Municipality')
fig.show()

In [None]:
badPredictions = scores.query('Score < -1').reset_index().loc[0:1].assign(Bg = lambda x: 'salmon')
goodPredictions = scores.query('Score > -1').reset_index().iloc[0:2].assign(Bg = lambda x: 'mediumaquamarine')
predictions = pd.concat([badPredictions, goodPredictions])
plotcount = 1
plt.figure(figsize=(20, 20))
for i, prediction in predictions.iterrows():
    ax = plt.subplot(4,2, plotcount)
    ax.set_facecolor(prediction['Bg'])
    plotNearestNeighbors(prediction['Municipality'], prediction["X_train"], prediction["y_train"], prediction["X_test"], prediction["y_test"],prediction["prediction"], prediction["Score"])
    plotcount +=1

In [None]:
scoresPie = [
     {"type": "between -1 and 1","amount": len(scores.query('Score > -1 or Score < 1'))},
     {"type": "less then -1","amount": len(scores.query('Score < -1'))}
]
scoresPie = pd.DataFrame(scoresPie)
pie, ax = plt.subplots(figsize=[10,6])
plt.pie(x=scoresPie['amount'], autopct="%.1f%%", labels=scoresPie['type'], pctdistance=0.5)
plt.title("the distribution of prediction scores from all municipalities", fontsize=14);
plt.legend()

<b>Clean Data</b>

- lifeExpectency
    - lifeExpectencyPerRegion2016_2019 
- populationChange
    - pop2002_2020
    - popOverview
    - popComparison2015_2020
    - growthPrediction2020_2050
    - absoluteNr
- death
    - reasons1997_2014
    - reasons2005_2012
    - reasons2013_2020
    - perWeek2020_2021
- birth
    - birthPerYear1899_2018
    - avaragesOfMonth