In [None]:
# -*- coding: utf-8 -*-
"""
Created on Tue Jan  2 12:05:59 2024

@author: AJAH STEPHEN CHIDI
"""

import matplotlib.gridspec as gridspec
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Reading in the dataset
pandemic = pd.read_csv('covid 19 CountryWise.csv')
pandemic

"""
Inspecting the data set to check for:

The dimension of the set
The general information about the data and,
if the data has no missing values
"""
 # Display the shape of the Dataset
print(pandemic.shape)

# Display the Information of the Dataset
print(pandemic.info())

# Check Dataset for Missing Values
print(pandemic.isna().sum())

# Display the Statistics Summary of the Dataset
print(pandemic.describe())

# Selecting the Columns to be used
pandemic = pandemic[['Country', 'Region', 'Total Cases', 'Total Deaths',
               'New Cases (7 days)', 'New Deaths (7 days)', 
               'New Cases (24 hours)',
               'New Deaths (24 hours)']]

# Display Selected Columns
print(pandemic)

# Checking for missing values and fill missing values
pandemic.isnull().sum()
pandemic.fillna(0, inplace=True)

# Exploring Countries with High Pandemic Cases

# Sorting the Total Cases column
disease = pandemic.sort_values('Total Cases', ascending=False)
disease = disease.head(10)
print(disease)

# Sorting the Total Cases column
death_toll = pandemic.sort_values('New Deaths (7 days)', ascending=False)
death_toll = death_toll.head(10)
print(death_toll)

# Sorting the Total Cases column
disease = pandemic.sort_values('New Cases (7 days)', ascending=False)
disease = disease.head(10)
print(disease)

# Countries with most number of death
death_toll = pandemic.sort_values('Total Deaths', ascending=False)
death_toll = death_toll.head(10)
print(death_toll)

# Abbreviating the countries with long names
death_toll['Abbr'] = death_toll['Country'].replace({'United States of America':
                                                    'USA', 'Russian Federation': 
                                                        'Russia',
                                                     'The United Kingdom': 
                                                         'UK', 'Indonesia': 
                                                             'Indonesia', 
                                                             'Republic of Korea':
                                                                 'Korea'})

# Abbreviating the Regions with long name
death_toll['Abbre'] = death_toll['Region'].replace({'Eastern Mediterranean':
                                                    'Eastern Mediterian',
                                                    'South-East Asia': 'Asia'})

death_toll

# Aggregating the data by continents

regions = disease.groupby('Region')[['Total Cases', 'Total Deaths', 
                                     'New Cases (7 days)', 
                                     'New Deaths (7 days)']].sum()
print(regions)

# creating a figure
fig = plt.figure(figsize=(15, 9), dpi=300)


# Adding a Subtitle Header to Dashboard
plt.suptitle('PANDEMIC CASES AROUND THE WORLD DASHBOARD\n\nAjah Stephen Chidi (Student Number: 22026656)',
             weight=1000, size=19, y=1.05)
plt.subplots_adjust()


# Creating a gridspec object
sub_gs = gridspec.GridSpec(4, 4, wspace=0.9, hspace=1.4)


# Creating a bar plot with markers Plot 3
ax1 = plt.subplot(sub_gs[2:4, 0:2])
ax1.bar(death_toll['Abbr'], death_toll['Total Deaths'], color='royalblue', 
        label='Total Deaths')
ax1.set_title('Top 10 Countries by Total Deaths', fontsize=12, 
              fontweight='bold')
ax1.set_xlabel('Country', fontsize=10, fontweight='bold')
ax1.set_ylabel('Total Deaths', fontsize=10, fontweight='bold')

# Adding markers for each data point
ax1.plot(death_toll['Abbr'], death_toll['Total Deaths'], color='red', 
         marker='o', label='Data Points')

# Show legend
ax1.legend()

# Assuming 'disease' is a DataFrame with columns 'Country' & 'Total Cases' Plot 2
ax2 = plt.subplot(sub_gs[0:2, 2:4])

# Generate random colors for each bar
num_countries = len(disease)
colors = np.random.rand(num_countries, 3)

bar_width = 0.8
border_size = 1.2

# Create a horizontal bar plot with unique colors for each bar
bars = ax2.barh(death_toll['Abbr'], disease['Total Cases'], color=colors, 
                height=bar_width,
                edgecolor='black', linewidth=border_size)

plt.gca().invert_yaxis()
ax2.set_title('Top 10 Countries by Total Cases',
              fontsize=12, fontweight='bold')
ax2.set_xlabel('Total Cases', fontsize=10, fontweight='bold')
ax2.set_ylabel('Country', fontsize=10, fontweight='bold')


# Create a line plot with a separate line for each region Plot 4
ax3 = plt.subplot(sub_gs[2:4, 2:4])

# DataFrame with Total Cases columns for Regions
death_toll['Region'] = death_toll['Region'].astype('category')

# Get the Top Regions based on Total Cases
top_regions = death_toll.groupby('Region')['Total Cases'].sum().nlargest(3).index
pandemic_filtered = death_toll[death_toll['Region'].isin(top_regions)]

ax3 = sns.barplot(data=pandemic_filtered, x='Total Cases', y='Region', 
                  palette='plasma')
ax3.set_title('Distribution of Total Cases by Region')
ax3.set_xlabel('Total Cases')
ax3.set_ylabel('Region')

# Generate a list of unique colors for each wedge Plot 1
num_regions = len(regions)
#colors = plt.cm.viridis(np.linspace(0, 1, num_regions))
colors = np.random.rand(num_regions, 3)

# Create a pie chart with unique colors
ax4 = plt.subplot(sub_gs[0:2, 0:2])
ax4.pie(regions['Total Deaths'], labels=regions.index, autopct='%1.1f%%', 
        colors=colors)
ax4.set_title('Proportion of Total Deaths by Region',
              fontsize=12, fontweight='bold')

summary_text = "Summary Report\n\nThe Dashboard above shows the PANDEMIC CASES from various countries and regions around the world.\n\nRegions like the Americas have the highest rate of Pandemic Total death of 51.1%, followed by Europe with 31.1%, and South-East Asia with 15.6%.\n\n Also, Countries like: The United States of America (USA) have the highest rate of Pandemic Total Death and Cases,\n\nfollowed by Brazil, India, Russia and so on. The visualization shows the Distribution of the top 3 Regions based on the sum of Total Cases.\n\nAdditionally, The long lines extending from the top of each bar are error bars. which represents the uncertainty \n\nor variability in the data distribution.  Overall, the data emphasizes the ongoing \n\nimpact of PANDEMIC CASES on global health and highlights the importance of continued efforts to mitigate its spread."



textbox = plt.text(0.5, -0.16, summary_text, transform=fig.transFigure,
                   fontsize=12, fontweight='bold', horizontalalignment='center')

plt.subplots_adjust(bottom=0.3)


# Setting a boarderline around the dashboard
fig = plt.gcf()
fig.patch.set_linewidth(5)  # set the width of the figure border
fig.patch.set_edgecolor('black')  # set the color of the figure border

# Plotting the show

plt.show()


