### COVID-19 DATA EDA

In [7]:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import os

### Prepare Work Space


In [None]:
df = pd.read_csv('./Datasets/cleaned_covid_dataset.csv', low_memory=False)

In [None]:
# Create 'Charts' directory if it doesn't exist
if not os.path.exists('Charts'):
    os.makedirs('Charts')


In [None]:
# helper functions
def save_plot(fig, filename):
    fig.savefig(f'Charts/{filename}.png', bbox_inches='tight')
    plt.close(fig)


### Preprocessing

In [11]:
# Map Sex column (1=Male, 2=Female)
df['Sex'] = df['Sex'].map({1: 'Male', 2: 'Female'})


# Create 'Age_Group' column
bins = [0, 20, 40, 60, 80, 120]
labels = ['0-20', '21-40', '41-60', '61-80', '81+']
df['Age_Group'] = pd.cut(df['Age'], bins=bins, labels=labels)


# Create 'Total_Comorbidities' column
comorbidities = ['Diabetes', 'COPD', 'Asthma', 'Immunosuppressed', 'Hypertension',
                 'Other_Disease', 'Cardiovascular_Disease', 'Obesity', 'Chronic_Renal_Disease']
df['Total_Comorbidities'] = df[comorbidities].sum(axis=1)


### Genereate summary Charts


In [None]:
#1 Age Distribution
fig, ax = plt.subplots(figsize=(8, 6))
sns.histplot(df['Age'], bins=30, kde=True, ax=ax)
ax.set_title('Age Distribution')
ax.set_xlabel('Age')
ax.set_ylabel('Frequency')
save_plot(fig, 'age_distribution')

#2 Death Ratio
death_counts = df['is_dead'].value_counts()
fig, ax = plt.subplots(figsize=(6, 6))
ax.pie(death_counts, labels=['Alive', 'is_dead'], autopct='%1.1f%%', colors=['green', 'red'])
ax.set_title('Death Ratio')
save_plot(fig, 'death_ratio')

#3 Comorbidities vs Death
fig = plt.figure(figsize=(14, 10))
for i, comorbidity in enumerate(comorbidities):
    plt.subplot(3, 3, i + 1)
    sns.countplot(x=comorbidity, hue='is_dead', data=df, palette='Set2')
    plt.title(f'{comorbidity} vs Death')
    plt.xlabel('Has Condition (1=Yes, 0=No)')
plt.tight_layout()
save_plot(fig, 'comorbidities_vs_death')

#4 Sex vs Death
fig, ax = plt.subplots(figsize=(8, 6))
sns.countplot(x='Sex', hue='is_dead', data=df, palette='Set1', ax=ax)
ax.set_title('Sex vs Death')
ax.set_xlabel('Sex')
ax.set_ylabel('Count')
save_plot(fig, 'sex_vs_death')

#5 Treatment Level vs Death
fig, ax = plt.subplots(figsize=(8, 6))
sns.countplot(x='Treatment_Level', hue='is_dead', data=df, palette='coolwarm', ax=ax)
ax.set_title('Treatment Level vs Death')
ax.set_xlabel('Treatment Level')
save_plot(fig, 'treatment_level_vs_death')

#6 Medical Unit Type vs Death
fig, ax = plt.subplots(figsize=(10, 6))
sns.countplot(x='Medical_Unit_Type', hue='is_dead', data=df, palette='Spectral', ax=ax)
ax.set_title('Medical Unit Type vs Death')
ax.set_xlabel('Medical Unit Type')
save_plot(fig, 'medical_unit_vs_death')

#7 Tobacco Use vs Death
fig, ax = plt.subplots(figsize=(8, 6))
sns.countplot(x='Tobacco_Use', hue='is_dead', data=df, palette='Set2', ax=ax)
ax.set_title('Tobacco Use vs Death')
ax.set_xlabel('Tobacco Use')
save_plot(fig, 'tobacco_use_vs_death')

#8 Comorbidity Correlation Heatmap
fig, ax = plt.subplots(figsize=(12, 10))
sns.heatmap(df[comorbidities].corr(), annot=True, cmap='YlGnBu')
ax.set_title('Correlation Between Comorbidities')
save_plot(fig, 'comorbidity_correlation')

#9 Age Group vs Death
fig, ax = plt.subplots(figsize=(8, 6))
sns.countplot(x='Age_Group', hue='is_dead', data=df, palette='Paired', ax=ax)
ax.set_title('Age Group vs Death')
ax.set_xlabel('Age Group')
save_plot(fig, 'age_group_vs_death')

#10 Total Comorbidities vs Death
fig, ax = plt.subplots(figsize=(8, 6))
sns.histplot(data=df, x='Total_Comorbidities', hue='is_dead', multiple='stack', palette='cool')
ax.set_title('Total Comorbidities vs Death')
ax.set_xlabel('Number of Comorbidities')
save_plot(fig, 'total_comorbidities_vs_death')

### End Of EDA