In [None]:
import os
import pandas as pd
import requests
import zipfile
from plotly import graph_objects as go

In [None]:
# https://data.worldbank.org/indicator/FP.CPI.TOTL.ZG
url_inflation = 'https://api.worldbank.org/v2/en/indicator/FP.CPI.TOTL.ZG?downloadformat=csv'
path_output_root = "data/scraped/world-bank"
path_zip = os.path.join(path_output_root, "inflation.zip")
path_csv = os.path.join(path_output_root, "inflation.csv")
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
can_overwrite = False


In [None]:
if not os.path.exists(path_zip):
	os.makedirs(path_output_root, exist_ok=True)

if not os.path.exists(path_zip) or can_overwrite:
	print("Downloading inflation data from World Bank...")
	headers = {"User-Agent": user_agent}
	response = requests.get(url_inflation, headers=headers)
	with open(path_zip, "wb") as f:
		f.write(response.content)
	print("Done.")
else:
	print("Using cached inflation data.")

if not os.path.exists(path_csv) or can_overwrite:
	print("Extracting inflation data...")
	with zipfile.ZipFile(path_zip) as z:
		namelist = z.namelist()
		print(f"Namelist: {namelist}")
		for name in namelist:
			if "Metadata" not in name:
				print(f"Extracting '{name}' to '{path_csv}'...")
				with open(path_csv, "wb") as f:
					f.write(z.read(name))
				break
	print("Done.")
else:
	print("Using already extracted inflation data.")

In [None]:
df = pd.read_csv(path_csv, skiprows=4)
df = df.iloc[:, :-1]
original_columns = list(df.columns)
indicator_name = df.iloc[0, 2]
indicator_code = df.iloc[0, 3]
if "Indicator Name" in df.columns:
	df = df.drop(columns=["Indicator Name", "Indicator Code"])
countries_count = len(df)
year_columns = [col for col in df.columns if col.isnumeric()]
print(f"Indicator name: '{indicator_name}'")
print(f"Indicator code: '{indicator_code}'")
print(f"Countries / groups count: {countries_count}")
print(f"Data from year {year_columns[0]} to {year_columns[-1]}")
df.tail()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
# plot inflation for Slovenia from 2010 to end ; CPI 2010 = 100
fig = go.Figure()
# first year index
i_2010 = df.columns.get_loc("2010")
x = df.columns[i_2010:]
y = df.loc[df["Country Name"] == "Slovenia"].iloc[0, i_2010:]
fig.add_trace(go.Scatter(x=x, y=y, mode="lines+markers", name="Slovenia"))
fig.update_layout(title=f"{indicator_name} (CPI 2010 = 100) in Slovenia", xaxis_title="Year", yaxis_title="Inflation (%)")
fig.show()

In [None]:
# plot United States, Germany, United Kingdom, Japan
fig = go.Figure()
countries = ["United States", "Germany", "United Kingdom", "Japan"]
x = df.columns[i_2010:]
for country in countries:
	y = df.loc[df["Country Name"] == country].iloc[0, i_2010:]
	fig.add_trace(go.Scatter
		(x=x, y=y, mode="lines+markers", name=country))
fig.update_layout(title=f"{indicator_name} (CPI 2010 = 100) in selected countries", xaxis_title="Year", yaxis_title="Inflation (%)")
# add legend
fig.update_layout(legend=dict(x=0, y=1.0))
fig.show()