In [3]:
#IMPORTING LIBRARIES AND DATASETS

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt



# IMPORTING THE FILTERED GENOTYPIC DATA
ATLAS_Dataset = pd.read_csv('atlas_2024_genes.csv')

# FILTER OUT THE DATA OF EACH CONTINENT FOR CONTINENT-BASED ANALYSES
Africa = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Africa']
Europe = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Europe']
North_America = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'North America']
South_America = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'South America']
Asia = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Asia']
Oceania = ATLAS_Dataset[ATLAS_Dataset['Continents'] == 'Oceania']

In [18]:
#TIME SERIES VISUALIZATION COMPARING THE TREND OF RECORDED GENOTYPIC DATA OF SELECTED CONTINENT(S) WITH THE GLOBAL AVERAGE

def compare_with_global_average(df, selected_continents=None):

    # Aggregate data by grouping by year and continent, counting the number of records for each group
    df_aggregated = df.groupby(['Year', 'Continents']).size().reset_index(name='Count')

    # Calculate the global average number of records per year
    global_avg = df_aggregated.groupby('Year')['Count'].mean().reset_index(name='Global Average')

    # Merge the global average data with the aggregated continent data
    df_aggregated = df_aggregated.merge(global_avg, on='Year')

    # Filter the data to include only selected continents and the global average, if specified
    if selected_continents is not None:
        df_aggregated = df_aggregated[df_aggregated['Continents'].isin(selected_continents) | (df_aggregated['Continents'] == 'Global Average')]

    # Create a line plot to compare trends in genotypic data for selected continents and the global average
    plt.figure(figsize=(14, 8))
    sns.lineplot(data=df_aggregated, x='Year', y='Count', hue='Continents', style='Continents', markers=True, dashes=False)

    # Add a separate line for the global average using a dashed line for distinction
    sns.lineplot(data=df_aggregated, x='Year', y='Global Average', color='black', label='Global Average', linestyle='--')

    # Add titles and labels
    plt.title('Comparison of Genotypic Data Trends Across Continents with Global Average')
    plt.xlabel('Year')
    plt.ylabel('Number of Records')

     # Display the legend
    plt.legend(title='Continent', bbox_to_anchor=(1.05, 1), loc='upper left')
    
    # Add grid lines
    plt.grid(True)
    plt.xticks(ticks=df_aggregated['Year'].unique(), labels=df_aggregated['Year'].unique().astype(int))
    plt.gca().spines['top'].set_visible(False)
    plt.gca().spines['right'].set_visible(False)

    # Adjust the layout
    plt.tight_layout()

    # Display the plot
    #plt.show()
    plt.savefig('compare global average.png')