In [1]:
#IMPORTING LIBRARIES AND DATASETS

import pandas as pd
import plotly.graph_objects as go


# IMPORTING THE FILTERED GENOTYPIC DATA
ATLAS_Dataset = pd.read_csv('atlas_2024_genes.csv')

# FILTER OUT THE DATA OF EACH CONTINENT FOR CONTINENT-BASED ANALYSES
Africa = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Africa']
Europe = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Europe']
North_America = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'North America']
South_America = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'South America']
Asia = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Asia']
Oceania = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Oceania']

In [8]:
# WORLD MAPS DISPLAYING THE TOP MOST PREVALENT GENOTYPE(S) FOR SELECTED SPECIES.

# Function to create a world map showing the top most prevalent genotypes for a selected species
# The 'genotypes' argument specifies the number of top genotypes to be visualized (1, 2, 3, etc.)
def genotypes_prevalence_map(df, species, genotypes):

    # Filter the DataFrame for the specified species
    df_filtered = df[df['Species'] == species]

    # Calculate the total number of isolates per country
    total_isolates_per_country = df_filtered.groupby('Country').size().reset_index(name='Total_Isolates')

    # Group data by country and gene, and count occurrences
    df_grouped = df_filtered.groupby(['Country', 'Gene']).size().reset_index(name='Count')

    # Merge the total isolates data with the grouped data to get total isolates per country
    df_grouped = df_grouped.merge(total_isolates_per_country, on='Country')

    # Calculate the percentage of each gene's occurrence relative to the total isolates for each country
    df_grouped['Percentage'] = (df_grouped['Count'] / df_grouped['Total_Isolates']) * 100

    # Function to get the top 'n'(1,2,3 etc.) genotypes based on the count for each country
    def get_top_genotypes(df):
        return df.nlargest(genotypes, 'Count')

    # Apply the function to get the top genotypes for each country
    df_predominant = df_grouped.groupby('Country', group_keys=False).apply(get_top_genotypes).reset_index(drop=True)
    
    # Initialize a Plotly figure
    fig = go.Figure()

    # Add a Scattergeo trace for each row in the predominant DataFrame
    for i, row in df_predominant.iterrows():
        fig.add_trace(go.Scattergeo(locationmode='country names',locations=[row['Country']],
            text=f"{row['Gene']}: {row['Count']}<br>Percentage: {row['Percentage']:.1f}%<br>Total Isolates Recorded: {row['Total_Isolates']}",
            marker=dict(size=row['Count'] * 0.1, line=dict(width=0.5, color='darkgray')),showlegend=False ))

    # Update the layout of the figure
    fig.update_layout(title=f'Most Prevalent Genotypes in Each Country for {species}',
        geo=dict(showframe=False,showcoastlines=True,projection_type='orthographic',landcolor='darkgreen',oceancolor='lightseagreen',
                 lakecolor='rgb(0, 116, 189)',showocean=True,coastlinecolor='orange',countrycolor='darkgrey',showland=True,showcountries=True),
        height=800,width=1000,margin=dict(l=0, r=0, t=40, b=40))

    # Update geos to adjust map projection rotation
    fig.update_geos(projection_rotation=dict(lon=0, lat=0, roll=0))

    # Create frames for animation to rotate the globe
    frames = [go.Frame(layout=dict(geo=dict(projection_rotation=dict(lon=lon))), name=f"frame{lon}") for lon in range(0, 360, 10)]

    # Assign frames to the figure
    fig.frames = frames

    # Display the figure
    fig.show()
