# OECD UNEMPLOYMENT ANALYSIS

In [1]:
# Start with loading all necessary libraries 
import pandas as pd 
import math

# Viz libraries 
import plotly.graph_objects as go 
from plotly.subplots import make_subplots
from dash import dash_table


# Dash
from dash import Dash, html, dcc, Input, Output

In [2]:
oecd_df = pd.read_csv(r'https://github.com/KyrilFionov/Python-Portfolio/tree/main/OECD-Unemployment-Analysis/OECD_Unemployment_Analysis_Input.csv',delimiter = ';')
oecd_df.head()

Unnamed: 0,country_code,country_name,gender,date,unemployment_rate
0,AUS,Australia,ALL,01/01/2000,6.77
1,AUS,Australia,ALL,01/02/2000,6.62
2,AUS,Australia,ALL,01/03/2000,6.57
3,AUS,Australia,ALL,01/04/2000,6.38
4,AUS,Australia,ALL,01/05/2000,


In [3]:
# Checking columns data types before proceeding
oecd_df.dtypes

country_code          object
country_name          object
gender                object
date                  object
unemployment_rate    float64
dtype: object

In [4]:
# Converting date object (string) into a datetime
oecd_df['date'] = pd.to_datetime(oecd_df['date'],format='%d/%m/%Y')
oecd_df['year'] = oecd_df['date'].dt.year
oecd_df['month'] = oecd_df['date'].dt.month
oecd_df.dtypes

country_code                 object
country_name                 object
gender                       object
date                 datetime64[ns]
unemployment_rate           float64
year                          int64
month                         int64
dtype: object

In [5]:
# Order the dataset for futher processing and correct visualization in Plotly
oecd_df = oecd_df.sort_values(['country_name','gender','date'],ascending=[True,True,True])

### Treating NULL Values

In [6]:
# Checking for NULL values
oecd_df.apply(lambda x: sum(x.isnull()), axis = 0)

country_code           0
country_name           0
gender                 0
date                   0
unemployment_rate    121
year                   0
month                  0
dtype: int64

In [7]:
# Creating NULL filling logic. If NULL - use previous month value

# Fill missing values in unemployment_rate using the previous date's value
oecd_df['unemployment_rate'].fillna(method='ffill',inplace=True)

# Checking for NULL values after filling
oecd_df.apply(lambda x: sum(x.isnull()), axis = 0)

country_code         0
country_name         0
gender               0
date                 0
unemployment_rate    0
year                 0
month                0
dtype: int64

### Create Dashboard filters and control parameters

In [8]:
# Create a country list to dynamically populate & name traces in subplots
country_list = oecd_df['country_name'].unique()

# Create a list of unique genders for RadioItems , id = gender_radioitems
gender_list = oecd_df['gender'].unique()

# Calculate the first and latest years for a RangeSlider, id = years_slider
min_year = oecd_df['year'].min()
max_year = oecd_df['year'].max()

# Create min & max unemployment_rate values to get independent colorscale for each country
cmin = oecd_df['unemployment_rate'].min()
cmax = oecd_df['unemployment_rate'].max()

# Create rows and columns variable to specify subplots dimensions and traces positions
n_rows = int(math.sqrt(len(country_list)))
n_cols = int(math.sqrt(len(country_list)))

### Dash Section

In [9]:
# Setting Styling Parameters
colors = {'background':'#F6F6F6','text':'#333333','colorscale':'rdpu'}
text_settings = {'font':'Arial'}

# Display the figure in a Dash app
app = Dash(__name__)

app.layout = html.Div([
    html.H1('OECD Unemployment Rates')

    ,html.Div([
        'CONTROL PANEL'
        # Add a RangeSliser to select years
        ,html.P()
        ,html.Div([
            dcc.RangeSlider(
                id='years_slider'
                ,min=min_year
                ,max=max_year
                ,step=1
                ,marks={str(year): str(year) for year in oecd_df['year'].unique()}
                ,value=[max_year-10, max_year]
            )
        ], style={'width': '48%', 'margin-bottom': '10px'})
        # Add a RadioItems to select gender category
        ,html.P()
        ,html.Div([
            'Population Category'
            ,dcc.RadioItems(
                id='gender_radioitems'
                ,options=gender_list
                ,value='ALL'
                ,inline = True
            )
        ], style={'width': '15%', 'margin-bottom': '10px'})
        # Add a Dropdown to select sortation citeria
        ,html.P()
        ,html.Div([
            'Sortation Criteria'
            ,dcc.Dropdown(
                id='sort_metrics_dropdown'
                ,value='Yearly_Unemployment_Rate_Change'
                ,clearable=False
            )
        ], style={'width': '25%', 'margin-bottom': '10px'})
        # Add a RadioItems to define whether to show Best/Worst performers
        ,html.P()
        ,html.Div([
            dcc.RadioItems(
                id='best_worst_countries_radioitems'
                ,options=[
                    {'label':'Best Permormers','value':'Best'}
                    ,{'label':'Worst Permormers','value':'Worst'}
                ]
                ,value='Best'
                ,inline = True
            )
        ], style={'width': '15%', 'margin-bottom': '10px'})
        # Add s Slider to define number of countries shown in a Table
        ,html.P()
        ,html.Div([
            'Number of Countries to Display in a Table'
            ,dcc.Slider(
                id='largest_slider'
                ,min=5
                ,max=len(country_list)
                ,step=5
                ,value=5
            )
        ],style={'width': '48%', 'margin-bottom': '10px'})
    ],style={'border': '2px solid black', 'padding': '10px', 'margin': '10px','font-family':text_settings['font']})


    # Add Table
    ,html.Br()
    ,html.Div([
        html.Label(id='table_label')
        ,dash_table.DataTable(
            id = 'table'
            ,columns = [
                {'name':'Country','id':'country_name'}
                ,{'name':'Avg Unemployment Rate %','id':'unemployment_rate'}
                ,{'name':'Min Unemployment Rate Achieved %','id':'Min_Unemployment_Rate'}
                ,{'name':'Max Unemployment Rate Achieved %','id':'Max_Unemployment_Rate'}
                ,{'name':'First Year Unemployment Rate %','id':'First_Year_Unemployment_Rate'}
                ,{'name':'Latest Year Unemployment Rate %','id':'Latest_Year_Unemployment_Rate'}
                ,{'name':'Unemployment Rate % Change','id':'Yearly_Unemployment_Rate_Change'}
            ]
            ,sort_action='native'
            ,style_table={'width': 'auto%'}
            ,style_header={
                'whiteSpace': 'normal'
                ,'overflow': 'hidden'
                ,'textOverflow': 'ellipsis'
                ,'maxWidth': '100px'
            }
        
        )
    ],style={'width': 'auto','margin-bottom': '10px','margin-left': '10px'}
    )

    # Add a Figure
    ,dcc.Graph(id='fig')
]
,style = {
    'background-color':colors['background']
    ,'color':colors['text']
    ,'font-family':text_settings['font']
}
)

# LINECHARTS SECTION

# Define a callback function that updates the subplot linechart figure based on the selected range of years & gender
@app.callback(
    Output('fig', 'figure')
    ,Input('years_slider', 'value')
    ,Input('gender_radioitems', 'value')
)
def update_figure(years,gender):
    # Filter the data to include only the selected range of years & gender
    oecd_df_filter = oecd_df[(oecd_df['year'] >= years[0]) & (oecd_df['year'] <= years[1]) & (oecd_df['gender'] == gender)]

    # Create a subplot with shared x and y axes
    oecd_linecharts = make_subplots(
        subplot_titles = country_list
        ,rows=n_rows
        ,cols=n_cols
        ,shared_yaxes=True
        ,shared_xaxes=True
        ,horizontal_spacing=0
    )

    # Loop through each country and add a line chart to the appropriate subplot
    for i, country in enumerate(country_list):
        # Get the data for the current country
        country_df = oecd_df_filter[oecd_df_filter['country_name']==country] 

        # Add a line chart to the subplot at the appropriate row and column
        oecd_linecharts.add_trace(
                    go.Scatter(
                        x=country_df['date']
                        ,y=country_df['unemployment_rate']
                        ,mode='markers'
                        ,marker = dict(
                            color = country_df['unemployment_rate']
                            ,colorscale = colors['colorscale']
                            ,cmin=cmin
                            ,cmax=cmax
                            ,showscale=True
                            ,colorbar=dict(
                                title='Unemployment Rate'
                            )
                        )
                        ,hovertemplate="Date: %{x}<br>Unemployment Rate: %{y:.1f}%<extra></extra>"

                    )
                    ,row=i // n_rows + 1
                    , col=i % n_cols + 1
        )

        # Set the title and axis labels for the entire figure
        oecd_linecharts.update_layout(
            showlegend=False
            ,height = 900
            ,plot_bgcolor=colors['background']
            ,paper_bgcolor=colors['background']
            ,font_color=colors['text']
        )
        oecd_linecharts.update_xaxes(showgrid=False)
        oecd_linecharts.update_yaxes(showgrid=False, showticklabels = False, zeroline = False)
    return oecd_linecharts

# TABLE SECTION

# Define a function to update Table Name (html.Label, id = table_label, dash_table.DataTable, id = table)
@app.callback(
        Output('table_label', 'children')
        ,Input('largest_slider', 'value')
        ,Input('years_slider', 'value')
        ,Input('sort_metrics_dropdown', 'value')
        ,Input('table', 'columns')
        ,Input('gender_radioitems', 'value')
        ,Input('best_worst_countries_radioitems', 'value')
)
def update_label(top_countries, years, sort_metric, columns, gender,best_worst_country):
    # Map column ID to column name
    col_names = {col['id']: col['name'] for col in columns}

    # Use column name instead of ID in label
    sort_metric_name = col_names.get(sort_metric, sort_metric)
    gender_label = 'entire' if gender == 'ALL' else ('male' if gender == 'MEN' else 'female')

    return f'{years[0]}-{years[1]} {best_worst_country} {top_countries} Countries by {sort_metric_name} within {gender_label} population'

# Define a function to create values for dcc.Dropdown, id = 'sort_metrics_dropdown'
@app.callback(
    Output('sort_metrics_dropdown', 'options'),
    Input('table', 'columns')
)
def update_sort_metrics_options(columns):
    options = [{'label': col['name'], 'value': col['id']} for col in columns if col['id'] != 'country_name']
    return options

# Define a callback function that updates the Table based on the selected range of years, gender and sortation criterias
@app.callback(
    Output('table', 'data')
    ,Input('years_slider', 'value')
    ,Input('gender_radioitems', 'value')
    ,Input('largest_slider', 'value')
    ,Input('sort_metrics_dropdown', 'value')
    ,Input('best_worst_countries_radioitems', 'value')
)

def update_table(years, gender, largest_n, sort_metric, best_worst_country):

    # Filter the data to include only the selected range of years
    oecd_df_filter = oecd_df[(oecd_df['year'] >= years[0]) & (oecd_df['year'] <= years[1]) & (oecd_df['gender'] == gender)]
    
    yearly_oecd_table = oecd_df_filter.groupby(['year', 'country_name', 'gender'])['unemployment_rate'].mean().round(2).reset_index()

    def add_calculations_columns(yearly_oecd_table):
        # Calculate the 'Unemployment Rate' for the first year of the dataset for each group
        yearly_oecd_table['First_Year_Unemployment_Rate'] = yearly_oecd_table['unemployment_rate'].iloc[0]

        # Calculate the 'Unemployment Rate' for the last year of the dataset for each group
        yearly_oecd_table['Latest_Year_Unemployment_Rate'] = yearly_oecd_table['unemployment_rate'].iloc[-1]

        # Calculate the minimum 'Unemployment Rate' for each group
        yearly_oecd_table['Min_Unemployment_Rate'] = yearly_oecd_table['unemployment_rate'].min()

        # Calculate the maximum 'Unemployment Rate' for each group
        yearly_oecd_table['Max_Unemployment_Rate'] = yearly_oecd_table['unemployment_rate'].max()

        yearly_oecd_table['Yearly_Unemployment_Rate_Change'] = yearly_oecd_table['Latest_Year_Unemployment_Rate'] - yearly_oecd_table['First_Year_Unemployment_Rate']

        return yearly_oecd_table

    oecd_table = yearly_oecd_table.drop('year', axis=1)

    oecd_table = oecd_table.groupby(['country_name', 'gender'],group_keys=False).apply(add_calculations_columns)
    oecd_table = oecd_table.groupby(['country_name', 'gender'],group_keys=False).mean().round(2).reset_index()

    if sort_metric == 'Yearly_Unemployment_Rate_Change' and best_worst_country == 'Best':
        oecd_table_top_countries = oecd_table.nsmallest(largest_n, sort_metric)
    elif best_worst_country == 'Best':
        oecd_table_top_countries = oecd_table.nsmallest(largest_n, sort_metric)
    else:
        oecd_table_top_countries = oecd_table.nlargest(largest_n, sort_metric)

    table_data = oecd_table_top_countries[[
            'country_name'
            ,'unemployment_rate'
            ,'Yearly_Unemployment_Rate_Change'
            ,'Min_Unemployment_Rate'
            ,'Max_Unemployment_Rate'
            ,'First_Year_Unemployment_Rate'
            ,'Latest_Year_Unemployment_Rate'
        ]].to_dict('records')

    return table_data

if __name__ == '__main__':
    app.run_server()

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8050
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [18/Mar/2023 14:01:15] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "GET /_favicon.ico?v=2.8.1 HTTP/1.1" 200 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "[36mGET /_dash-component-suites/dash/dcc/async-slider.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "[36mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "[36mGET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "[36mGET /_dash-component-suites/dash/dash_table/async-highlight.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "[36mGET /_dash-component-suites/dash/dash_table/async-table.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [18/Mar/2023 14:01:16] "[36mGET 