In [51]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
import xml.etree.ElementTree as ET

# Dette publique

In [52]:
# Load the data from the Excel file
warnings.filterwarnings("ignore", category=UserWarning, module="openpyxl")
file_path = 'DATA/dette_pub.xlsx'  
dette_publique = pd.read_excel(file_path, sheet_name='Feuille 1')

code = pd.read_csv('DATA/code.tsv', sep='\t')

# Define column names for code and label
col_code = 'CODE'
col_label = 'Label - French'

def find_value(x):
    """Find the label value based on the code."""
    matched = not code[code[col_code] == x].empty
    if matched:
        return code[code[col_code] == x].loc[:, col_label].iloc[0]
    elif x == 'TIME':
        return 'Country'
    return None

# Modify the index of the dataframe
dette_publique.index = dette_publique.iloc[:, 0].apply(find_value)
dette_publique.index.name = None

# Set the column names based on the 'Country' row
dette_publique.columns = dette_publique.loc['Country']

# Filter the dataframe to include only rows from 'Belgique' onwards and exclude the 'TIME' column
dette_publique = dette_publique.loc['Belgique':'United Kingdom', dette_publique.columns != 'TIME']

# Filter the dataframe to include only columns from the year 2013 onwards
dette_publique = dette_publique.loc[:, 2013:]

def to_date(x):
    """Convert a value to datetime."""
    return pd.to_datetime(x, format='%Y')

# Vectorize the to_date function
vect_to_date = np.vectorize(to_date)

# Convert the columns to datetime
dette_publique.columns = vect_to_date(dette_publique.columns.values)

monthly_dates = pd.date_range(start='2013-01', end='2023-01', freq='MS')

# Add columns for each month from 2013 to 2023
dette_publique = dette_publique.reindex(columns=dette_publique.columns.union(monthly_dates))

def fill_val(x):
    """Fill missing values by resampling and interpolating."""
    return x.resample('MS').interpolate(method='quadratic')

# Apply the fill_val function to each row
dette_publique = dette_publique.apply(func=fill_val, axis=1)

# IPCH

In [53]:
# Load the data
ipch = pd.read_csv('DATA/ipch.tsv', sep='\t')
code_cp = pd.read_csv('DATA/code_cp.tsv', sep='\t')

# Set index
def get_CP(x): return x[8:12]  # Extract CP code
def get_id(x): return x[13:15]  # Extract id
vect_get_cp = np.vectorize(get_CP)
vect_get_id = np.vectorize(get_id)
ipch['CP'] = vect_get_cp(ipch.iloc[:, 0])  # Apply CP extraction
ipch['id'] = vect_get_id(ipch.iloc[:, 0])  # Apply id extraction

ipch.drop(columns='freq,unit,coicop,geo\\TIME_PERIOD', inplace=True)  # Drop unnecessary columns

ipch['country'] = ipch.loc[:, 'id'].apply(find_value)  # Find country names
ipch.set_index(['country', 'CP'], inplace=True)  # Set multi-index
ipch.drop(columns='id', inplace=True)  # Drop id column

# Convert the columns to datetime
def to_date_M(x):
    """Convert a value to datetime."""
    try:
        return pd.to_datetime(x[:-1], format='%Y-%m')
    except:
        print('fail')
        return x

vect_to_date_M = np.vectorize(to_date_M)
ipch.columns = vect_to_date_M(ipch.columns.values)  # Apply datetime conversion

# Filter data
ipch = ipch[~ipch.index.get_level_values('country').str.\
            startswith(('Union', 'Zone'))]  # Exclude certain countries
ipch = ipch.loc[:, (ipch.columns >= pd.to_datetime('2013-01', format='%Y-%m')) &
                (ipch.columns <= pd.to_datetime('2023-01', format='%Y-%m'))]  # Filter by date range
ipch.iloc[:, :-1] = ipch.iloc[:, :-1].map(lambda x: pd.to_numeric(\
    x.replace(' ', '').replace('d', ''), errors='coerce'))  # Clean and convert to numeric

# Create CP DataFrame
CP = pd.DataFrame(index=ipch.index.get_level_values('CP').unique())  # Unique CP values
CP.index.name = None

# Define weights for CP
ipch_weights = {
    'CP00': 0.05,
    'CP01': 0.08,
    'CP02': 0.05,
    'CP03': 0.04,
    'CP04': 0.10,
    'CP05': 0.04,
    'CP06': 0.05,
    'CP07': 0.12,
    'CP08': 0.04,
    'CP09': 0.08,
    'CP10': 0.03,
    'CP11': 0.13,
    'CP12': 0.02
}
CP['Weights'] = CP.index.map(ipch_weights)  # Map weights to CP
CP['Description'] = CP.index.map(code_cp.set_index('CODE')['Label - French'])  # Map descriptions to CP

ipch = ipch.groupby('CP')  # Group by CP
# ipch.get_group('CP00')

In [54]:
print(''.join(['o' for k in range(80)] + ['x' for k in range(40)]))

ooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooooxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx


# Chomage

In [55]:
def parse_chomage_xml(file_path):
    """Parse XML file and extract data into a DataFrame."""
    tree = ET.parse(file_path)  # Parse XML file
    root = tree.getroot()  # Get root element

    # Initialize lists to store data
    data = []
    columns = set()
    rows = set()

    # Extract data from <Series> and <Obs> tags
    for series in root.findall('.//Series'):
        geo = series.attrib.get('geo')  # Get "geo" attribute
        if geo:
            rows.add(geo)
            for obs in series.findall('Obs'):
                time_period = obs.attrib.get('TIME_PERIOD')  # Get "TIME_PERIOD" attribute
                obs_value = obs.attrib.get('OBS_VALUE')  # Get "OBS_VALUE" attribute
                if time_period and obs_value:
                    columns.add(time_period)
                    data.append((geo, time_period, obs_value))

    # Create DataFrame with appropriate indices
    df = pd.DataFrame(index=sorted(rows), columns=sorted(columns))

    # Fill DataFrame with extracted values
    for geo, time_period, obs_value in data:
        df.at[geo, time_period] = obs_value

    return df

# Load the data
file_path = 'DATA/chomage.xml'
chomage = parse_chomage_xml(file_path)

# Format the data
chomage.columns = chomage.columns.map(lambda x: \
                                      pd.to_datetime(x, format='%Y-%m'))  # Convert columns to datetime
chomage.index = chomage.index.map(lambda x: \
                code.loc[code.loc[:, 'CODE'] == x, 'Label - French'].iloc[0])  # Map index to labels
chomage.drop('Zone euro - 20 pays (à partir de 2023)', inplace=True)  # Drop specific rows
chomage.drop('Union européenne - 27 pays (à partir de 2020)', inplace=True)  # Drop specific rows
chomage = chomage.apply(pd.to_numeric)  # Convert data to numeric

In [56]:
# dette_publique.loc['France'] # Series
# ipch.get_group('CP00').loc['France'].iloc[0] # Series
# chomage.loc['France'] # Series

In [57]:
countries = 'France, Allemagne, Italie'.split(', ')
dates = ('2015-01, 2020-03'.split(', '))
dates = pd.date_range(dates[0], dates[1], freq= 'MS')

def plot_dette(countries = countries, dates = dates):
    plt.figure(figsize=(9,4))
    for country in countries:
        dette_publique.loc[country, dates].plot(label = country)
    plt.title(f'Dette publique ({dates[0].strftime('%m/%Y')} - {dates[-1].strftime('%m/%Y')})')
    plt.legend()

def plot_dette(countries = countries, dates = dates):
    plt.figure(figsize=(9,4))
    for country in countries:
        ipch.get_group('CP00').loc[country, dates].iloc[0].plot(label = country)
    plt.title(f'Inflation ({dates[0].strftime('%m/%Y')} - {dates[-1].strftime('%m/%Y')})')
    plt.legend()

def plot_dette(countries = countries, dates = dates):
    plt.figure(figsize=(9,4))
    for country in countries:
        chomage.loc[country, dates].plot(label = country)
    plt.title(f'Chômage ({dates[0].strftime('%m/%Y')} - {dates[-1].strftime('%m/%Y')})')
    plt.legend()

# Affichage

In [58]:
import ipywidgets as widgets
from ipywidgets import interact_manual
import datetime


# Widgets pour sélectionner les pays et les dates
countries_widget = widgets.Text(
    value='France, Allemagne, Italie',
    description='Countries:',
    placeholder='Enter countries separated by commas'
)

start_date_widget = widgets.DatePicker(
    value=pd.to_datetime('2015-1', format = '%Y-%m'),
    description='Start Date'
)

end_date_widget = widgets.DatePicker(
    value=pd.to_datetime('2020-3', format = '%Y-%m'),
    description='End Date'
)

# Fonction pour tracer la dette publique
@interact_manual(countries=countries_widget, start_date=start_date_widget, end_date=end_date_widget)
def plot_dette(countries, start_date, end_date):
    countries = countries.split(', ')
    dates = pd.date_range(start_date, end_date, freq='MS')
    
    plt.figure(figsize=(9, 4))
    for country in countries:
        if country in dette_publique.index:
            dette_publique.loc[country, dates].plot(label=country)
    plt.title(f'Dette publique ({dates[0].strftime("%m/%Y")} - {dates[-1].strftime("%m/%Y")})')
    plt.legend()
    plt.show()

# Fonction pour tracer l'inflation
@interact_manual(countries=countries_widget, start_date=start_date_widget, end_date=end_date_widget)
def plot_inflation(countries, start_date, end_date):
    countries = countries.split(', ')
    dates = pd.date_range(start_date, end_date, freq='MS')
    
    plt.figure(figsize=(9, 4))
    for country in countries:
        if country in ipch.get_group('CP00').index:
            ipch.get_group('CP00').loc[country, dates].iloc[0].plot(label=country)
    plt.title(f'Inflation ({dates[0].strftime("%m/%Y")} - {dates[-1].strftime("%m/%Y")})')
    plt.legend()
    plt.show()

# Fonction pour tracer le chômage
@interact_manual(countries=countries_widget, start_date=start_date_widget, end_date=end_date_widget)
def plot_chomage(countries, start_date, end_date):
    countries = countries.split(', ')
    dates = pd.date_range(start_date, end_date, freq='MS')
    
    plt.figure(figsize=(9, 4))
    for country in countries:
        if country in chomage.index:
            chomage.loc[country, dates].plot(label=country)
    plt.title(f'Chômage ({dates[0].strftime("%m/%Y")} - {dates[-1].strftime("%m/%Y")})')
    plt.legend()
    plt.show()

interactive(children=(Text(value='France, Allemagne, Italie', continuous_update=False, description='Countries:…

interactive(children=(Text(value='France, Allemagne, Italie', continuous_update=False, description='Countries:…

interactive(children=(Text(value='France, Allemagne, Italie', continuous_update=False, description='Countries:…

In [59]:
PATH = '../OutInfo/Data/'
import sys
sys.path.append(PATH)
from SRC.tools import load
url_1='https://julie-sclaunich.emi.u-bordeaux.fr/DATA/API_FR.INR.DPST_DS2_fr_xml_v2_52919.xml'
xlm = load(url_1, split='')

In [60]:
def parse_xml_to_dataframe(xml_string):
    # Lire et parser le string XML
    root = ET.fromstring(xml_string)

    # Initialiser les listes pour stocker les données
    data = []
    countries = []
    years = []

    # Parcourir les balises <record>
    for record in root.findall('.//record'):
        country = None
        year = None
        value = None

        # Parcourir les balises <field> à l'intérieur de <record>
        for field in record.findall('field'):
            if field.attrib.get('name') == 'Country or Area':
                country = field.text
                countries.append(country)
            elif field.attrib.get('name') == 'Year':
                year = field.text
                years.append(year)
            elif field.attrib.get('name') == 'Value':
                value = field.attrib.get('value')  # Récupérer la valeur de l'attribut "value"
                if value is None:  # Si l'attribut "value" n'existe pas, utiliser le texte de la balise
                    value = np.nan

        if country and year:
            data.append((country, year, value))

    countries = pd.Series(countries).unique()
    years = pd.Series(years).unique()
    value = pd.Series(value).unique()
    # Créer un DataFrame vide avec les indices appropriés
    df = pd.DataFrame(index=countries, columns=years)

    # Remplir le DataFrame avec les valeurs extraites
    for country, year, value in data:
        df.at[country, year] = value

    return df

df = parse_xml_to_dataframe(xlm)

In [61]:
df.head()

Unnamed: 0,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,...,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
Aruba,,,,,,,,,,,...,,,,,,,,,,
,,,,,,,,,,,...,,,,,,,,,,
Afghanistan,,,,,,,,,,,...,,,,,,,,,,
Angola,,,,,,,,,,,...,,,,,,,,,,
Albanie,,,,,,,,,,,...,,,,,,,,,,
