In [None]:
from global_params import paths
from os import listdir, path
from data.data_handler import HandleCSV
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
CSV_DIR = paths.PATHS["csvs"]
csvs = [f for f in listdir(CSV_DIR) if path.isfile(path.join(CSV_DIR, f)) and not f.startswith('.')]

In [None]:
print(csvs)

In [None]:
csvs_handlers = [HandleCSV(CSV_DIR.joinpath(r"".join(csv))) for csv in csvs]

In [None]:
dfs = []
encoding: str = "ISO-8859-1"
for csv_handler in csvs_handlers:
    df = csv_handler.get_dataframe(encoding)
    df['DateOfDeath'] = pd.to_datetime(df['DateOfDeath'], errors='coerce')
    df['DateOfDeath'] = df['DateOfDeath'].dt.date
    df['YearOfDeath'] = df['YearOfDeath'].astype(str)  # Convert to string to handle non-numeric characters
    df['YearOfDeath'] = df['YearOfDeath'].str.extract('(\d+)', expand=False)
    df['YearOfDeath'] = pd.to_numeric(df['YearOfDeath'], errors='coerce')
    dfs.append(df)

In [None]:
result_df = pd.concat([dfs[0], dfs[1]], ignore_index=True)

In [None]:
result_df['YearOfDeath'].unique()

In [None]:
ax = sns.countplot(x='YearOfDeath', data=result_df)
for p in ax.patches:
    ax.annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2 + 0.05, p.get_height()/2),
                ha='center', va='bottom', rotation=90, fontsize=9, color='white')
ax.set_xticklabels(ax.get_xticklabels(), rotation=90)
plt.title('Histogram of deaths by Year')
plt.show()

In [None]:
bar_width = 1

plt.figure(figsize=(33, 8))

ax = sns.countplot(x='YearOfDeath', hue='GenderDescription', data=result_df, palette='Set1', width=bar_width)

for p in ax.patches:
    ax.annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2 + 0.05, p.get_height()/2),
                ha='center', va='bottom', rotation=90, fontsize=9, color='white')

ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

plt.title('Histogram of deaths by Year and Gender')
plt.show()

In [None]:
result_df

In [None]:
covid_df = result_df.copy()

In [None]:
covid_df['isCovid'] = covid_df['ICD10SubcategoryDescription1'].where(covid_df['ICD10SubcategoryDescription1'] == 'COVID-19 virus identificado', False)

In [None]:
bar_width = 1

plt.figure(figsize=(33, 8))

ax = sns.countplot(x='YearOfDeath', hue='isCovid', data=covid_df, palette='Set1', width=bar_width)

for p in ax.patches:
    ax.annotate(f'{int(p.get_height())}', (p.get_x() + p.get_width() / 2 + 0.05, p.get_height()/2),
                ha='center', va='bottom', rotation=90, fontsize=9, color='white')

ax.set_xticklabels(ax.get_xticklabels(), rotation=90)

plt.title('Histogram of deaths by Year by covid')
plt.show()