In [1]:
import pandas as pd
import numpy as np
import sqlite3
conn = sqlite3.connect('consulates.sqlite')

In [2]:
df = pd.read_pickle("all_months.pkl")
DATA_START_DATE = df["Month"].min()
DATA_END_DATE = df["Month"].max()
COVID_START_DATE = pd.Timestamp("20200301")
NORMAL_MONTHS = pd.date_range(start=DATA_START_DATE, end=COVID_START_DATE, freq="MS", name="Month", closed="left")
COVID_MONTHS = pd.date_range(start=COVID_START_DATE, end=DATA_END_DATE, freq="MS", name="Month")
NORMAL_MONTHS, COVID_MONTHS

(DatetimeIndex(['2017-03-01', '2017-04-01', '2017-05-01', '2017-06-01',
                '2017-07-01', '2017-08-01', '2017-09-01', '2017-10-01',
                '2017-11-01', '2017-12-01', '2018-01-01', '2018-02-01',
                '2018-03-01', '2018-04-01', '2018-05-01', '2018-06-01',
                '2018-07-01', '2018-08-01', '2018-09-01', '2018-10-01',
                '2018-11-01', '2018-12-01', '2019-01-01', '2019-02-01',
                '2019-03-01', '2019-04-01', '2019-05-01', '2019-06-01',
                '2019-07-01', '2019-08-01', '2019-09-01', '2019-10-01',
                '2019-11-01', '2019-12-01', '2020-01-01', '2020-02-01'],
               dtype='datetime64[ns]', name='Month', freq='MS'),
 DatetimeIndex(['2020-03-01', '2020-04-01', '2020-05-01', '2020-06-01',
                '2020-07-01', '2020-08-01', '2020-09-01', '2020-10-01',
                '2020-11-01', '2020-12-01', '2021-01-01', '2021-02-01',
                '2021-03-01', '2021-04-01'],
               dtype='dat

In [3]:
df["Visa Class"] = df["Visa Class"].replace(to_replace=["CR1", "IR1"], value="CR1/IR1")
df["Visa Class"] = df["Visa Class"].replace(to_replace=["CR2", "IR2"], value="CR2/IR2")
df = df.groupby(["Post", "Visa Class", "Month"], as_index=False, sort=False)["Issuances"].sum()
df

Unnamed: 0,Post,Visa Class,Month,Issuances
0,Mumbai,E22,2017-03-01,3
1,Islamabad,DV1,2017-03-01,1
2,Islamabad,E11,2017-03-01,1
3,Islamabad,E14,2017-03-01,1
4,Islamabad,E15,2017-03-01,2
...,...,...,...,...
87647,Ho Chi Minh City,F1,2021-04-01,59
87648,Ho Chi Minh City,EW,2021-04-01,5
87649,Ho Chi Minh City,E3,2021-04-01,6
87650,Ho Chi Minh City,FX,2021-04-01,207


In [4]:
import re

def slugify(text: str) -> str:
    text = text.strip().lower()
    text = re.sub(r"\s", "_", text)
    text = re.sub(r"\W", "", text)
    text = text.replace("_", "-")
    return text

df["Post Slug"] = df["Post"].apply(slugify)
df["Visa Class Slug"] = df["Visa Class"].apply(slugify)

post_slugs = pd.DataFrame(df[["Post Slug", "Post"]].drop_duplicates().set_index("Post Slug"))
post_slugs.to_sql("post_slugs", conn, if_exists="replace")

visa_slugs = pd.DataFrame(df[["Visa Class Slug", "Visa Class"]].drop_duplicates().set_index("Visa Class Slug"))
visa_slugs.to_sql("visa_slugs", conn, if_exists="replace")

  sql.to_sql(


In [5]:
def get_baseline(df: pd.DataFrame) -> pd.DataFrame:
    df = df.set_index("Month")
    return df[["Issuances"]].reindex(index=NORMAL_MONTHS, fill_value=0).mean()

baselines = df.groupby(["Post Slug", "Visa Class Slug"], sort=False).apply(get_baseline)
baselines

Unnamed: 0_level_0,Unnamed: 1_level_0,Issuances
Post Slug,Visa Class Slug,Unnamed: 2_level_1
mumbai,e22,1.861111
islamabad,dv1,0.361111
islamabad,e11,0.305556
islamabad,e14,0.361111
islamabad,e15,0.861111
...,...,...
guayaquil,f3,0.000000
ho-chi-minh-city,am,0.000000
ho-chi-minh-city,c2a,0.000000
ho-chi-minh-city,f2a,0.000000


In [6]:
df.loc[df["Post Slug"] == "guayaquil"].loc[df["Visa Class Slug"] == "f3"]

Unnamed: 0,Post,Visa Class,Month,Issuances,Post Slug,Visa Class Slug
87628,Guayaquil,F3,2021-04-01,9,guayaquil,f3


In [7]:
baselines.loc[("budapest", "cr1ir1")]

Issuances    3.361111
Name: (budapest, cr1ir1), dtype: float64

In [8]:
baselines.to_pickle("baselines.pkl")
baselines.to_sql("baselines", conn, if_exists="replace")

  sql.to_sql(


In [9]:
def get_backlog(df: pd.DataFrame) -> pd.DataFrame:
    df = df.reset_index()
    key = tuple(df.loc[0, ["Post Slug", "Visa Class Slug"]])
    baseline = baselines.loc[key][0]

    df = df.set_index("Month")

    normal_months = df[["Issuances"]].reindex(index=NORMAL_MONTHS, fill_value=0)

    covid_months = df[["Issuances"]].reindex(index=COVID_MONTHS, fill_value=0)
    covid_months["Actual Progress"] = covid_months["Issuances"].cumsum()
    covid_months["Expected Progress"] = np.linspace(baseline, baseline * len(covid_months), num=len(covid_months))
    covid_months["Backlog"] = covid_months["Actual Progress"] - covid_months["Expected Progress"]
    covid_months["Months Ahead"] = covid_months["Backlog"] / baseline
    covid_months = covid_months.drop(columns=["Actual Progress", "Expected Progress"])

    return pd.concat([normal_months, covid_months])[["Issuances", "Backlog", "Months Ahead"]]

backlogs = df.groupby(["Post Slug", "Visa Class Slug"], sort=False).apply(get_backlog)

In [None]:
backlogs.to_pickle("backlogs.pkl")

In [None]:
backlogs.to_sql("backlogs", conn, if_exists="replace")