In [None]:
import pandas as pd
import json
from urllib.request import urlopen

In [None]:
alias_dict = urlopen("https://raw.githubusercontent.com/cov-lineages/pango-designation/master/pango_designation/alias_key.json")
alias_dict = json.loads("".join([x.decode("utf-8").strip() for x in alias_dict]))

def unalias_lineage(input_lineage, alias_dict=alias_dict):
    isWithdrawn = input_lineage[0] == "*"
    stem = input_lineage.split(".")[0].replace("*", "")
    rest_of_it = ".".join(input_lineage.split(".")[1:])
    if alias_dict.get(stem) and isinstance(alias_dict.get(stem), str):
        if isWithdrawn:
            return "*" + alias_dict[stem] + "." + rest_of_it
        else:
            return alias_dict[stem] + "." + rest_of_it
    else:
        return input_lineage

In [None]:
in_df = pd.read_json(
            "https://github.com/cov-lineages/lineages-website/raw/master/_data/lineage_data.full.json",
            orient="index"
        ).drop([
            "Country counts", 
            "Date",
            "Travel history"
            ], axis=1)

in_df["Unaliased"] = in_df["Lineage"].apply(lambda x: unalias_lineage(x))

in_df["Countries"] = in_df["Countries"].apply(lambda x: x.replace("_", "\u00A0"))

in_df["Earliest date"] = in_df["Earliest date"].apply(lambda x: x.replace("-", "\u2011"))
in_df["Latest date"] = in_df["Latest date"].apply(lambda x: x.replace("-", "\u2011"))

in_df = in_df[[
                'Lineage',
                'Unaliased',
                'Countries', 
                'Earliest date', 
                'Latest date',
                'Number designated', 
                'Number assigned', 
                'Description'
            ]]

in_df

In [None]:
list_of_withdrawn_lineages = list(
    in_df.loc[in_df["Lineage"].str.startswith("*")]["Lineage"]
)

## get just the withdrawn lineages
withdrawn_df = in_df.loc[
    in_df["Lineage"].isin(list_of_withdrawn_lineages)
][[
    "Lineage",
    "Unaliased",
    "Description"
]]

## get rid of the withdrawn lineages
in_df = in_df.loc[
    ~in_df["Lineage"].isin(list_of_withdrawn_lineages)
    & ~in_df["Lineage"].isin([x.replace("*", "") for x in list_of_withdrawn_lineages])
]

In [None]:
in_df_html = in_df.to_html(index=False)
withdrawn_df_html = withdrawn_df.to_html(index=False)

outcss = """
<style>
    html{
        font-family: sans-serif;
        margin: 0 auto;
        background: #e6e7e8
    }
    body{
        display: flex;
        flex-direction: column;
        align-items: center;
    }
    h2{
        background: #a50062;
        color: white;
        text-shadow: 2px 2px 4px #3e3e3e;
        width: 95%;
        text-align: center;
        border-radius: 0.5em;
        padding: 0.4em;
    }
    .dataframe{
        border-collapse: collapse;
        width: 95%;
        border: none;
        table-layout: fixed;
        box-shadow: 0px 0px 20px gray;
        border-radius: 0.5em;
    }
    .dataframe tr{
        border-top: 1px solid lightgray;
        border-bottom: 1px solid lightgray;
    }
    .dataframe tr:first-of-type{
        border-top: none;
    }
    .dataframe td, .dataframe th{
        padding: 0.4em;
        border: none;
        overflow-wrap: break-word;
    }
    .dataframe th{
        text-align: center;
        color: white;
        text-shadow: 2px 2px 4px #3e3e3e;
    }
    .dataframe th:nth-of-type(1){
        background: #d62900;
        border-top-left-radius: 0.5em;
    }
    .dataframe th:nth-of-type(2){
        background: #ef7622;
    }
    .dataframe th:nth-of-type(3){
        background: #ff9b56;
    }
    .dataframe th:nth-of-type(4){
        background: #ffaf56;
    }
    .dataframe th:nth-of-type(5){
        background: #d2627d;
    }
    .dataframe th:nth-of-type(6){
        background: #d262a6;
    }
    .dataframe th:nth-of-type(7){
        background: #b65692;
    }
    .dataframe th:nth-of-type(8){
        background: #a50062;
    }
    .dataframe th:last-of-type{
        border-top-right-radius: 0.5em;
    }
    .dataframe tr:nth-of-type(even){
        background: #efecec;
    }
    .dataframe tr:nth-of-type(odd){
        background: white;
    }
    .dataframe td:nth-of-type(3), .dataframe td:nth-of-type(8){
        font-size: smaller;
    }
    .dataframe td:nth-of-type(4), 
    .dataframe td:nth-of-type(5), 
    .dataframe td:nth-of-type(6),
    .dataframe td:nth-of-type(7){
        text-align: center;
    }
    .dataframe td:nth-of-type(2){
        font-style: italic;
    }
    #filter_container{
        background: white;
        border-radius: 0.5em;
        padding: 1em;
        margin-bottom: 1em;
        display: grid;
        grid-column-start: 1;
        grid-column-end: 2;
        grid-row-start: 1;
        grid-row-end: 3;
        justify-content: space-around;
        min-width: 500px;
        grid-row-gap: 0.3em;
        box-shadow: 5px 5px 5px lightgrey;
    }
    #filter_container h3{
        padding: 0;
        margin: 0;
        margin-bottom: 0.5em;
    }
    #return_rows{
        border-radius: 0.5em;
        padding: 1em;
        margin-bottom: 1em;
        min-width: 500px;
        background: white;
        box-shadow: 5px 5px 5px lightgrey;
        text-align: center;
    }
    #introdiv{
        background: #e5ebff;
        box-shadow: 5px 5px 5px lightgrey;
        border-radius: 0.5em;
        max-width: 700px;
        padding: 1em;
        line-height: 1.4em;
    }
    .skeuButton{
        background: white;
        color: black;
        box-shadow: 2px 2px 4px #80808080, -2px -2px 4px #ffffff6b;
        border-radius: 0.4em;
        border: 1px solid rgb(237, 237, 237);
        transition: border 0.5s;
        cursor: pointer;
        user-select: none;
        padding: 5px;
      }
    .skeuButton:active{
        color: darkblue;
        box-shadow: inset 2px 2px 4px #80808080, inset -2px -2px 4px #ffffff6b;
    }
    .skeuButton:hover{
        border: 1px solid rgb(217, 217, 217);
    }
    .fancyLink{
        background: #01107c;
        border-radius: 1em;
        padding-left: 0.5em;
        padding-right: 0.5em;
        color: white !important;
        text-decoration: none !important;
        white-space: nowrap;
    }
    .fancyLink:hover{
        background: #061ed4;
    }
    #lineage_search{
        border-radius: 0.5em;
        padding: 1em;
        margin-bottom: 1em;
        min-width: 500px;
        background: white;
        box-shadow: 5px 5px 5px lightgrey;
        display: grid;
        grid-column-start: 1;
        grid-column-end: 2;
        grid-row-start: 1;
        grid-row-end: 3;
        justify-content: space-around;
        min-width: 500px;
        grid-row-gap: 0.3em;
    }
</style>
"""

script_tag = """
<script>
'use strict'

function debounce(func, timeout = 300){
    let timer;
    return (...args) => {
        clearTimeout(timer);
        timer = setTimeout(() => { func.apply(this, args); }, timeout);
    };
}

function do_filter(){
    var start_date = document.getElementById("start_date").value
    var end_date = document.getElementById("end_date").value
    
    document.getElementById("start_date").disabled = true
    document.getElementById("end_date").disabled = true
    
    toggle_hideme_css();
    
    document.getElementById("return_rows").innerHTML = "" // this never animates
    console.log("working")
    
    filter_dates(start_date, end_date)
    
    document.getElementById("start_date").disabled = false
    document.getElementById("end_date").disabled = false
    
    toggle_hideme_css();
    
    update_row_count();
    
    console.log("done")
}

function toggle_hideme_css(){
    var hideme_css = "<style id='hideme_css'>.hideme{display: none}</style>"
    if (document.getElementById("hideme_css")){
        document.getElementById("hideme_css").remove()
    }
    else{
        document.getElementsByTagName("head")[0].innerHTML += hideme_css
    }
}

function toggle_hideme_css_search(){
    var hideme_css_search = "<style id='hideme_css_search'>.hideme_css_search{display: none}</style>"
    if (document.getElementById("hideme_css_search")){
        document.getElementById("hideme_css_search").remove()
    }
    else{
        document.getElementsByTagName("head")[0].innerHTML += hideme_css_search
    }
}

function update_row_count(){
    document.getElementById("return_rows").innerHTML = `${get_visible_row_count()} of ${get_total_row_count()} rows displayed`
}

function get_visible_row_count(){
    var visibleRows = Array.prototype.filter.call(document.getElementsByClassName("dataframe")[0].rows, x => (!x.classList.contains("hideme") && !x.classList.contains("hideme_css_search"))).length - 1 // minus header
    return visibleRows
}

function get_total_row_count(){
    return document.getElementsByClassName("dataframe")[0].rows.length - 1
}

function filter_dates(start_date, end_date){
    for (var i=1; i<document.getElementsByClassName("dataframe")[0].rows.length; i++){
        var latestDateString = document.getElementsByClassName("dataframe")[0].rows[i].cells[4].innerText.replaceAll("\u2011", "-") // nonbreaking hypens to normal hyphens
        if (latestDateString == ""){
            document.getElementsByClassName("dataframe")[0].rows[i].classList.add("hideme");
        }
        else{
            if ((latestDateString < start_date) || (latestDateString > end_date)){
                if (!document.getElementsByClassName("dataframe")[0].rows[i].classList.contains("hideme")){
                    document.getElementsByClassName("dataframe")[0].rows[i].classList.add("hideme");
                }
            }
            else{
                if (document.getElementsByClassName("dataframe")[0].rows[i].classList.contains("hideme")){
                    document.getElementsByClassName("dataframe")[0].rows[i].classList.remove("hideme");
                }
            }
        }
    }
}

function leftpad(input_string, pad_character, target_length){
    var returnString = input_string.toString()
    while (returnString.length < target_length){
        returnString = `${pad_character}${returnString}`
    }
    return returnString
}

function datetime_to_yyyy_mm_dd(input_date){
    var yyyy = input_date.getFullYear()
    var mm = leftpad(input_date.getMonth() + 1, "0", 2) // why do months start at 0?
    var dd = leftpad(input_date.getDate(), "0", 2)
    return `${yyyy}-${mm}-${dd}`
}

function get_dates_array(){
    var datesArray = Array.prototype.map.call(document.getElementsByClassName("dataframe")[0].rows, x => x.cells[4].innerText); // get the latest date column
    datesArray = datesArray.slice(1) // get rid of the header
    datesArray = datesArray.filter(x => x != null && x != "") // remove blanks
    datesArray = datesArray.map(x => x.replaceAll("\u2011", "-")) // replace nonbreaking hyphens with regular hyphens
    datesArray = datesArray.map(x => new Date(x)) // parse as date objects
    return datesArray
}

function get_earliest_latest_date(){
    var dates = get_dates_array();
    dates.sort((a,b)=>a.getTime()-b.getTime()); // magic date-y sort-y function
    return {"earliest": dates.at(0), "latest": dates.at(-1)}
}

function quick_filter_date(quick_filter){
    var filter_date = new Date()
    if (quick_filter == "last_30"){
        document.getElementById("end_date").value = datetime_to_yyyy_mm_dd(filter_date)
        filter_date.setDate(filter_date.getDate()-30)
        filter_date = datetime_to_yyyy_mm_dd(filter_date)
        document.getElementById("start_date").value = filter_date
        do_filter();
        return
    }
    else if (quick_filter == "last_60"){
        document.getElementById("end_date").value = datetime_to_yyyy_mm_dd(filter_date)
        filter_date.setDate(filter_date.getDate()-60)
        filter_date = datetime_to_yyyy_mm_dd(filter_date)
        document.getElementById("start_date").value = filter_date
        do_filter();
        return
    }
    else if (quick_filter == "this_year"){
        document.getElementById("end_date").value = datetime_to_yyyy_mm_dd(filter_date)
        filter_date = `${filter_date.getFullYear()}-01-01`
        document.getElementById("start_date").value = filter_date
        do_filter();
        return
    }
    else if (quick_filter == "all_time"){
        var earliest_latest_dict = get_earliest_latest_date()
        document.getElementById("start_date").value = datetime_to_yyyy_mm_dd(earliest_latest_dict["earliest"])
        document.getElementById("end_date").value = datetime_to_yyyy_mm_dd(earliest_latest_dict["latest"])
        do_filter();
        return
    }
    else{
        return console.error("invalid quick filter")
    }
}

function do_search(searchTerm){
    toggle_hideme_css_search();
    
    document.getElementById("return_rows").innerHTML = "" // this never animates
    console.log("working")
    
    filter_by_lineage(searchTerm);
    
    toggle_hideme_css_search();
    
    update_row_count();
    
    console.log("done")
}

const debounce_do_search = debounce((x) => do_search(x));

function filter_by_lineage(input_lineage){
    input_lineage = input_lineage.toUpperCase()
    // start from 1 to skip the header
    for (var i=1; i<document.getElementsByClassName("dataframe")[0].rows.length; i++){
        // for the regular lineage
        if (document.getElementsByClassName("dataframe")[0].rows[i].cells[0].innerText.includes(input_lineage)){
            if (document.getElementsByClassName("dataframe")[0].rows[i].classList.contains("hideme_css_search")){
                document.getElementsByClassName("dataframe")[0].rows[i].classList.remove("hideme_css_search")
            }
        }
        else{
            document.getElementsByClassName("dataframe")[0].rows[i].classList.add("hideme_css_search")
        }
        // for the unaliased
        if (document.getElementsByClassName("dataframe")[0].rows[i].cells[1].innerText.includes(input_lineage)){
            if (document.getElementsByClassName("dataframe")[0].rows[i].classList.contains("hideme_css_search")){
                document.getElementsByClassName("dataframe")[0].rows[i].classList.remove("hideme_css_search")
            }
        }
        else{
             document.getElementsByClassName("dataframe")[0].rows[i].classList.add("hideme_css_search")
        }
    }
}

function init_date_filter_input_values(){
    var earliest_latest_dict = get_earliest_latest_date()

    var start_date = datetime_to_yyyy_mm_dd(earliest_latest_dict["earliest"]);
    var end_date = datetime_to_yyyy_mm_dd(earliest_latest_dict["latest"]);
    
    document.getElementById("start_date").value = start_date
    document.getElementById("start_date").min = start_date
    document.getElementById("start_date").max = end_date
    
    document.getElementById("end_date").value = end_date
    document.getElementById("end_date").min = start_date
    document.getElementById("end_date").max = end_date
}

function init(){
    init_date_filter_input_values();
    update_row_count();
    toggle_hideme_css(); // initialise by adding the css
    toggle_hideme_css_search();
}

init();
</script>
"""

assembled_html = f"""
<html>
<head>
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <meta charset="UTF-8"/>
    <meta name="description" content="SARS-CoV-2 lineage list - with filtering and unaliased names" />
    <title>Lineage List</title>
    <link rel="icon" href="favicon.svg" />
    {outcss}
</head>
<body>
<div id="introdiv">
    A list of SARS-CoV-2 lineages, heavily inspired by <a class="fancyLink" href="https://cov-lineages.org/lineage_list.html" target="_blank">cov-lineages.org</a> and 
    utilising data from the <a class="fancyLink" href="https://github.com/cov-lineages/pango-designation" target="_blank">cov-lineages/pango-designation</a> 
    and <a class="fancyLink" href="https://github.com/cov-lineages/lineages-website/blob/master/_data/lineage_data.full.json" target="_blank">cov-lineages/lineages-website</a> GitHub repositories.
</div>
<h2>Lineages list</h2>
    <div id="filter_container">
        <h3 style="grid-column-start: 1; grid-row-start: 1; grid-column-end: 3">Filter: latest date</h3>
        <div style="grid-column-start: 1; grid-row-start: 2">
            From <input type="date" id="start_date" onchange="do_filter()">
        </div>
        <div style="grid-column-start: 2; grid-row-start: 2">
            Until <input type="date" id="end_date" onchange="do_filter()">
        </div>
        <div style="grid-column-start: 1; grid-column-end: 3; grid-row-start: 3; display: grid; grid-row-gap: 0.3em;">
            <button class="skeuButton" type="button" onclick="quick_filter_date('last_30')">Last 30 days</button>
            <button class="skeuButton" type="button" onclick="quick_filter_date('last_60')">Last 60 days</button>
            <button class="skeuButton" type="button" onclick="quick_filter_date('this_year')">This year</button>
            <button class="skeuButton" type="button" onclick="quick_filter_date('all_time')">All time</button>
        </div>
    </div>
    <div id="lineage_search">
        <h3 style="display: inline-block; grid-column-start: 1; grid-column-end: 1; grid-row-start: 1; margin: 0">Filter: lineage</h3><input type="text" oninput="debounce_do_search(this.value)" style="grid-row-start: 1; grid-column-start: 2; grid-column-end: 3"></input>
    </div>
    <div id="return_rows">
    </div>
    {in_df_html}
<h2>Withdrawn lineages</h2>
{withdrawn_df_html}
</body>
{script_tag}
</html>
"""

with open("index.html", "w") as outfile:
    outfile.write(assembled_html)