In [1]:
#Import the usual dependencies 
import pandas as pd
import numpy as np
from pathlib import Path
import datetime

#Recommended installations 

#https://ipywidgets.readthedocs.io/en/stable/user_install.html
#https://plotly.com/python/bar-charts/
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
from ipywidgets import interact

#create the path to the CSV file 
all_path = Path("./CSV_Data/all_countries_data.csv")

#turn the CSV file to a DataFrame
all_countries= pd.read_csv(all_path, encoding='unicode_escape')

#display the data for preview
all_countries.head()

Unnamed: 0,Updated,Country,Year-Week,Source,New Cases,Number Sequenced,Percent Sequenced,validDenominator,Variant,Detections of Variant,Number Sequenced of Known Variant,Percent Variant,Year,Week,Date
0,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,Other,0,0,0.0,2020,1,2020-01-06
1,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like,0,0,0.0,2020,1,2020-01-06
2,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,XBB.1.5-like+F456L,0,0,0.0,2020,1,2020-01-06
3,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.351,0,0,0.0,2020,1,2020-01-06
4,1712458590363,Ireland,2020-01,GISAID,0,0,0,True,B.1.617.2,0,0,0.0,2020,1,2020-01-06


In [2]:
#all_countries['Week'] = all_countries['Date'].dt.isocalendar().week.astype(int)
#all_countries['Year'] = pd.to_datetime(all_countries['Year'])

In [3]:
#checking the datatypes 
all_countries.dtypes

Updated                                int64
Country                               object
Year-Week                             object
Source                                object
New Cases                              int64
Number Sequenced                       int64
Percent Sequenced                      int64
validDenominator                        bool
Variant                               object
Detections of Variant                  int64
Number Sequenced of Known Variant      int64
Percent Variant                      float64
Year                                   int64
Week                                   int64
Date                                  object
dtype: object

In [4]:
# Counting different variants across countries
overall_variant_counts = all_countries.groupby('Variant')['Detections of Variant'].sum().reset_index()

# pull the top 3 variants from the data 
top_variants = overall_variant_counts.nlargest(3, 'Detections of Variant')
print(top_variants)

      Variant  Detections of Variant
9   B.1.617.2                1133067
13       BA.2                 928160
20       BA.5                 704999


In [5]:
#filter the data by each variant 
filtered_data = all_countries[all_countries['Variant'].isin(top_variants['Variant'])]

# Calculate overall variant counts for the filtered data
overall_variant_counts_2020 = filtered_data.groupby(['Year', 'Variant'])['Detections of Variant'].sum().reset_index()

# Find the top 3 variants for the year 2020
# 12 variables for 3 variants over a span of 4 years 
top_variants_2020 = overall_variant_counts_2020.nlargest(12, 'Detections of Variant')

print(top_variants_2020)

    Year    Variant  Detections of Variant
3   2021  B.1.617.2                1109132
7   2022       BA.2                 918453
8   2022       BA.5                 673002
11  2023       BA.5                  31911
6   2022  B.1.617.2                  23744
10  2023       BA.2                   8982
4   2021       BA.2                    721
0   2020  B.1.617.2                    188
5   2021       BA.5                     47
2   2020       BA.5                     39
1   2020       BA.2                      4
9   2023  B.1.617.2                      3


In [6]:
#https://ipywidgets.readthedocs.io/en/stable/user_install.html
import ipywidgets as widgets

#https://plotly.com/python/bar-charts/#bar-chart-with-plotly-express
import plotly.express as px

#need this to do the log10 for each value
import math


# Define the data
# It was easier to hard code this
data = {
    'Year': [2021 ,2022, 2022, 2023, 2022, 2023, 2021, 2020, 2021, 2020, 2020, 2023],
    'Variant': ['B.1.617.2','BA.2', 'BA.5', 'BA.5', 'B.1.617.2', 'BA.2', 'BA.2', 'B.1.617.2', 'BA.5', 'BA.5', 'BA.2', 'B.1.617.2'],
    'Detections of Variant (log10)': [ math.log10(1109132), math.log10(918453),math.log10(673002),math.log10(31911), math.log10(23744),math.log10(8982), math.log10(721),math.log10(188),math.log10(47),math.log10(39),math.log10(4),math.log10(3)]
}
df = pd.DataFrame(data)

# Get unique years
year = df['Year'].unique()

# Create dropdown widget for selecting years
year_dropdown = widgets.Dropdown(
    options=list(year),
    description='Select Year:',
    disabled=False,
)

# Function to update output based on selected year
def update_output(year):
    # Filter DataFrame for the selected year
    filtered_df = df[df['Year'] == year]
    
    # Create a bar plot of detections by variant using Plotly
    fig = px.bar(filtered_df, x='Variant', y='Detections of Variant (log10)', color='Variant',
                 labels={'Variant': 'Variant'},
                 title=f'Detections of Variants by {year}')
    
    fig.show()

# Display the dropdown widget and connect it to the update_output function
widgets.interactive(update_output, year=year_dropdown)

interactive(children=(Dropdown(description='Select Year:', options=(2021, 2022, 2023, 2020), value=2021), Outp…