# WISE Database - Analysis

In [None]:
# Stdlib imports
from pathlib import Path
from collections import namedtuple
from itertools import combinations
from typing import Dict, List

# 3rd party imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib import colormaps as cm

# Local imports
from pywise import wise_consts as const
from pywise import wise_scraper as scraper
from pywise import wise_processor as processor
from pywise import wise_analysis as analysis
from pywise import wise_aux as aux

## Setup

## 1) Load WISE database

In [None]:
# configure the paths once
country_code = "SWE"
const.configure_paths(iso3=country_code)

In [None]:
wloader = scraper.WiseLoader()

# Specifying the optional argument "country_iso3" makes
# the code load only data for that country. This has the
# effect of significantly reducing the time it takes to
# execute the load() method in the next line.
raw_data = wloader.load(country_iso3 = country_code)

## 2) Process/Transform Data

In [None]:
pipeline = processor.TransformationPipeline(raw_data, wloader.metatable)
final_output = pipeline.run()

In [None]:
results = pipeline.collect_results()

## 3) Data Visualization

In [None]:
pipeline.create_inspection_plots(create='all', write=True)

## 4) Analysis
### 4.1) Data Availability & Coverage Analysis
#### 4.1.1) Analysis of capitals
Question: How many metrics per capital are there?

In [None]:
n_metrics_per_capital = analysis.nMetricsPerCapital(results["clean"])
npc = n_metrics_per_capital.analyze()

#### 4.1.2) Analysis of sparse data
Question: How many metrics per capital are too sparse (i.e. have less than 10 data points)

In [None]:
n_sparse_per_capital = analysis.nSparseMetricsPerCapital(results["sparse_cols"])
nspc = n_sparse_per_capital.analyze()

### 4.3) Analysis of raw data availability
Goal: Visually represent how many data points are available for each metric. Add additional information such as when that data is available and what capital the metric belongs to.

In [None]:
raw_data_availability = analysis.RawDataAvailability(results["raw"])
rda = raw_data_availability.analyze()

### 4.4) Correlation Analysis
Question: Which metrics are redundant?

In [None]:
for iso3, df in final_output.items():
    corr_zero_lag = analysis.CorrleationAnalysis(df, iso3, lag=0)
    corr_zero_lag.analyze()

In [None]:
for iso3, df in final_output.items():
    corr_agg = analysis.CorrleationAnalysis(df, iso3, lag=-1)
    corr_agg.analyze()

### 4.5) Trend & Performance Analysis
#### 4.5.1) Analysis of trend statistics per capital
Question 1: How well do the individual capitals do in terms of evolving in the right direction?

Question 2: How many key indicators are there among the groups of best and worst performing metrics, respectively?

In [None]:
ta = analysis.PerformanceRanker(results["clean"])
ta.analyze()

In [None]:
ta.output[country_code].groupby("capital - primary").agg(count=pd.NamedAgg(column="slope_norm", 
                                                                           aggfunc="count"
                                                                          ),
                                                         q25=pd.NamedAgg(column="slope_norm", 
                                                                         aggfunc=lambda x: np.percentile(x,25)
                                                                        ),
                                                         median=pd.NamedAgg(column="slope_norm", 
                                                                            aggfunc=lambda x: np.median(x)
                                                                           ),
                                                         q75=pd.NamedAgg(column="slope_norm", 
                                                                         aggfunc=lambda x: np.percentile(x,75)
                                                                        ),
                                                         )

#### 4.5.2) Best & Worst Performering Metrics
##### 4.5.2.a) Best & Worst Performering Metrics over all
Question: Which are the 3 best and worst performing metrics over all capitals?

In [None]:
ta.additional_results["top_performers"][country_code]["overall"]

In [None]:
ta.additional_results["bottom_performers"][country_code]["overall"]

##### 4.5.2.b) Best & Worst Performering Metrics per capital
Question: Which are the 3 best and worst performing metrics within each capital?

In [None]:
aux.pretty_print({cap: ta.additional_results["top_performers"][country_code][cap] for cap in ta.capitallist})

In [None]:
aux.pretty_print({cap: ta.additional_results["bottom_performers"][country_code][cap] for cap in ta.capitallist})