# Plot counts per lineage through time

## Imports

In [None]:
import altair as alt
import pandas as pd

## Configuration

In [None]:
min_date = "2021-08-01"

## Load metadata

In [None]:
def load_lineage_dates(metadata_file, lineage):
    df = pd.read_csv(metadata_file, sep="\t").dropna(subset=["date"])
    df["lineage"] = lineage
    dates = df.loc[~(df["date"].str.contains("X")), ["lineage", "date", "region"]]
    
    return dates

In [None]:
h3n2_dates = load_lineage_dates("../data/h3n2/metadata.tsv", "H3N2")

In [None]:
h1n1pdm_dates = load_lineage_dates("../data/h1n1pdm/metadata.tsv", "H1N1pdm")

In [None]:
vic_dates = load_lineage_dates("../data/vic/metadata.tsv", "Vic")

In [None]:
#yam_dates = load_lineage_dates("../data/yam/metadata.tsv", "Yam")

In [None]:
dates = pd.concat([h3n2_dates, h1n1pdm_dates, vic_dates])
#dates = pd.concat([h3n2_dates, h1n1pdm_dates, vic_dates, yam_dates])

In [None]:
dates = dates.query("(date != '36-09-05') & (date != '?')").copy()

In [None]:
dates.head()

In [None]:
dates["date"].max()

In [None]:
dates.shape

In [None]:
recent_dates = dates[dates["date"] > min_date].copy()

In [None]:
recent_dates["date"] = pd.to_datetime(recent_dates["date"])

In [None]:
recent_dates.shape

In [None]:
regions = [region for region in sorted(recent_dates["region"].drop_duplicates().values) if region != "?"]

In [None]:
binned_counts = recent_dates.set_index("date").groupby("lineage").resample("1MS").count().rename(columns={"lineage": "samples"}).reset_index()

In [None]:
binned_counts

In [None]:
alt.Chart(binned_counts).mark_line(point=alt.OverlayMarkDef(size=100)).encode(
    x=alt.X("yearmonth(date):T", title="Date"),
    y=alt.Y("samples:Q", title="Number of samples"),
    color=alt.Color("lineage:N", sort=["H3N2", "H1N1pdm", "Vic", "Yam"], title="Lineage"),
    tooltip=["lineage:N", "date:T", "samples:Q"],
).configure_axis(
    labelFontSize=14,
    titleFontSize=14,
).configure_legend(
    labelFontSize=14,
    titleFontSize=14,
).properties(
    width=800,
    height=300,
)

In [None]:
binned_counts_h1n1pdm = recent_dates.set_index("date").query(
    "(lineage == 'H1N1pdm') & (date >= '2021-01-01')"
).groupby(
    "region"
).resample("1MS").count().rename(columns={"region": "samples"}).reset_index()

In [None]:
binned_counts_h1n1pdm

In [None]:
alt.Chart(binned_counts_h1n1pdm).mark_line(point=alt.OverlayMarkDef(size=100)).encode(
    x=alt.X("yearmonth(date):T", title="Date"),
    y=alt.Y("samples:Q", title="Number of samples"),
    color=alt.Color("region:N", title="Region", scale=alt.Scale(domain=regions, scheme='category10')),
    tooltip=["region:N", "date:T", "samples:Q"],
).configure_axis(
    labelFontSize=14,
    titleFontSize=14,
).configure_legend(
    labelFontSize=14,
    titleFontSize=14,
).properties(
    width=800,
    height=300,
)

In [None]:
binned_counts_h3n2 = recent_dates.set_index("date").query(
    "(lineage == 'H3N2') & (date >= '2019-11-01')"
).groupby(
    "region"
).resample("1MS").count().rename(columns={"region": "samples"}).reset_index()

In [None]:
binned_counts_h3n2.query("region == 'China'")

In [None]:
alt.Chart(binned_counts_h3n2).mark_line(point=alt.OverlayMarkDef(size=100)).encode(
    x=alt.X("yearmonth(date):T", title="Date"),
    y=alt.Y("samples:Q", title="Number of samples"),
    color=alt.Color("region:N", title="Region", scale=alt.Scale(domain=regions, scheme='category10')),
    tooltip=["region:N", "date:T", "samples:Q"],
).configure_axis(
    labelFontSize=14,
    titleFontSize=14,
).configure_legend(
    labelFontSize=14,
    titleFontSize=14,
).properties(
    width=800,
    height=300,
)

In [None]:
binned_counts_vic = recent_dates.set_index("date").query(
    "(lineage == 'Vic') & (date >= '2021-01-01')"
).groupby(
    "region"
).resample("1MS").count().rename(columns={"region": "samples"}).reset_index()

In [None]:
alt.Chart(binned_counts_vic).mark_line(point=alt.OverlayMarkDef(size=100)).encode(
    x=alt.X("yearmonth(date):T", title="Date"),
    y=alt.Y("samples:Q", title="Number of samples"),
    color=alt.Color("region:N", title="Region", scale=alt.Scale(domain=regions, scheme='category10')),
    tooltip=["region:N", "date:T", "samples:Q"],
).configure_axis(
    labelFontSize=14,
    titleFontSize=14,
).configure_legend(
    labelFontSize=14,
    titleFontSize=14,
).properties(
    width=800,
    height=300,
)