<center><h1> - Economical context analysis - </h1></center>



## Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import mplcyberpunk

from soprasteria.inventory import COLUMNS
from soprasteria.preprocess import preprocess_socio_eco_df

%load_ext autoreload
%autoreload 2

## Configs

In [None]:
VERBOSE = True

DATA_PATH = Path("../data")
RAW_DATA_PATH = DATA_PATH / "raw" / "socio_eco.csv"
OUTPUT_DATA_PATH = DATA_PATH / "processed" / " socio_eco_processed.csv"

## Reading and inspecting the data 

In [None]:
socio_eco_df = pd.read_csv(RAW_DATA_PATH, encoding="ANSI", sep=";", index_col=0)

if VERBOSE:
    display(socio_eco_df)

In [None]:
socio_eco_df.info()

### First impressions:
- We have **30** entries (rows). Each row represents a snapshot of different economical indicators at the end of each month between May 2008 and October 2010.
- We have a total of **4** features (columns). \
They represent economical indicators of each month:
    * `tx_var_emploi` : Taux de variation de l'emploi (indicateur trimestriel)
    * `idx_prix_conso` : Indice des prix à la consommation (indicateur mensuel)
    * `idx_conf_conso` : Indice de confiance des consommateurs (indicateur mensuel)



### Next steps :

Even before exploring the data more thoroughly, it is clear that we will need to take care of a few things during pre-processing to clean the data and make it useable for our in-depth Analysis :


# Data preprocessing 

In [None]:
socio_eco_df = preprocess_socio_eco_df(socio_eco_df)

if VERBOSE:
    display(socio_eco_df)

## Export

In [None]:
socio_eco_df.to_csv(OUTPUT_DATA_PATH, index=False)

# Data vizualisation & analysis

In [None]:
# Line Plot - tx_var_emploi
plt.figure(figsize=(10, 5))
plt.plot(socio_eco_df[COLUMNS.date], socio_eco_df[COLUMNS.variation_emploi])
plt.xlabel(COLUMNS.date)
plt.ylabel(COLUMNS.variation_emploi)
plt.title(COLUMNS.variation_emploi)
plt.xticks(rotation=45)
mplcyberpunk.add_glow_effects()
plt.show()

In [None]:
# Line Plot - idx_prix_conso
plt.figure(figsize=(12, 6))
plt.plot(socio_eco_df[COLUMNS.date], socio_eco_df[COLUMNS.indice_prix])
plt.xlabel(COLUMNS.date)
plt.ylabel(COLUMNS.indice_prix)
plt.title(COLUMNS.indice_prix)
plt.xticks(rotation=45)
mplcyberpunk.add_glow_effects()
plt.show()

In [None]:
# Line Plot - idx_conf_conso
plt.figure(figsize=(12, 6))
plt.plot(socio_eco_df[COLUMNS.date], socio_eco_df[COLUMNS.indice_consommateur])
plt.xlabel(COLUMNS.date)
plt.ylabel(COLUMNS.indice_consommateur)
plt.title(COLUMNS.indice_consommateur)
plt.xticks(rotation=45)
mplcyberpunk.add_glow_effects()
plt.show()

In [None]:
# Scatter Plot - tx_var_emploi vs idx_prix_conso
plt.figure(figsize=(8, 6))
plt.scatter(socio_eco_df[COLUMNS.variation_emploi], socio_eco_df[COLUMNS.indice_prix])
plt.xlabel(COLUMNS.variation_emploi)
plt.ylabel(COLUMNS.indice_prix)
plt.title("tx_var_emploi vs idx_prix_conso")
mplcyberpunk.add_glow_effects()
plt.show()

# Scatter Plot - tx_var_emploi vs idx_conf_conso
plt.figure(figsize=(8, 6))
plt.scatter(
    socio_eco_df[COLUMNS.variation_emploi], socio_eco_df[COLUMNS.indice_consommateur]
)
plt.xlabel(COLUMNS.variation_emploi)
plt.ylabel(COLUMNS.indice_consommateur)
plt.title("tx_var_emploi vs idx_conf_conso")

mplcyberpunk.add_glow_effects()
plt.show()

# Scatter Plot - idx_prix_conso vs idx_conf_conso
plt.figure(figsize=(8, 6))
plt.scatter(
    socio_eco_df[COLUMNS.indice_prix], socio_eco_df[COLUMNS.indice_consommateur]
)
plt.xlabel(COLUMNS.indice_prix)
plt.ylabel(COLUMNS.indice_consommateur)
plt.title("idx_prix_conso vs idx_conf_conso")
mplcyberpunk.add_glow_effects()
plt.show()