In [11]:
import pandas as pd
import numpy as np
import statsmodels.stats.api as sms
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('region_stats.csv', index_col=0)
df['name_en'] = df['name_en'].fillna(df['name'])
df.drop('name', axis=1, inplace=True)
df = df.rename(columns={'name_en': 'name'})
df = df.transpose().reset_index()
df.columns = df.iloc[0]
df = df.rename(columns={'name': 'level'}).drop(index=0, axis=0)
df['level'] = df['level'].str.split('_').str[0].str.split('V').str[1]
df['level'] = pd.to_numeric(df['level'])
df

In [None]:
agg = df.transpose().aggregate(['mean', 'median', 'std', 'max'])
agg = agg.transpose()
agg['level'] = df['level']
agg

In [14]:
countries = df.columns.difference(['level'])
melted = pd.melt(df, id_vars=['level'], value_vars=countries, var_name='country', value_name='count')
melted_agg = pd.melt(agg, id_vars=['level'], value_vars=['mean', 'median', 'std', 'max'], var_name='metric', value_name='value')

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(data=melted, x='level', y='count', label='values', color='tab:blue')
sns.lineplot(data=melted, x='level', y='count', errorbar=('ci', 95), label='mean (CI 95%)', color='tab:orange')
plt.xlabel('Administrative level')
plt.ylabel('Value')
plt.title('Regions per administrative level')
plt.legend(bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.grid(axis='y')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
sns.lineplot(data=melted, x='level', y='count', errorbar=('ci', 95), err_style='bars')
#sns.lineplot(data=melted, x='level', y='count', errorbar=('ci', 99), err_style='bars', alpha=0.3)
#sns.lineplot(data=melted_agg.query('metric=="max"'), x='level', y='value', errorbar='ci', err_style='bars')
plt.xlabel('Administrative level')
plt.ylabel('Count')
plt.title('Regions per administrative level (mean, confidence interval 95%)')
#plt.legend(title='Metric', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.grid()
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
#sns.scatterplot(data=melted, x='level', y='count')
ax = sns.barplot(data=melted, x='level', y='count', errorbar='ci')
plt.xlabel('Administrative level')
plt.ylabel('Count')
plt.title('Regions per administrative level (mean, confidence interval 95%)')
#plt.legend(title='Metric', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.grid(axis='y')
plt.show()

In [None]:
plt.figure(figsize=(10, 6))
ax = sns.barplot(data=melted_agg.query('metric in ["mean", "median"]'), x='level', y='value', hue='metric')
plt.bar_label(ax.containers[0], fmt='%.2f')
plt.bar_label(ax.containers[1], fmt='%d')
plt.xlabel('Administrative level')
plt.ylabel('Value')
plt.title('Regions per administrative level')
plt.legend(title='Metric', bbox_to_anchor=(1.01, 1), loc='upper left')
plt.tight_layout()
plt.grid(axis='y')
plt.show()

In [None]:
agg