### Halálozási adatok elemzése 	&#x1F480;

Célunk megnézni, hogy a közzétett halálozási adatok mennyire pontosak.

Ötlet: Hasnolítsuk össze a Covid előtti ötéves halálozási átlaghoz az adatokat.

In [1]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, HoverTool
from bokeh.io import output_notebook

In [2]:
def adatcsvkeep(start_date,end_date,countries,keep_columns):
    '''
    beolvassa a csv fájlt és csinál belőle egy pandas adatszerkezetet,

    start_date, end_date stringek yyyy-mm-dd alakban,

    countries egy string lista azon országokból, amiket meg akarunk tartani,
    
    delete_columns string lista azon oszlopokról, amiket ki akarunk törölni
    '''  
    df = pd.read_csv('owid-covid-data.csv')                
    df = df[df["iso_code"].isin(countries)]
    df = df[(df["date"]>=start_date) & (df["date"]<=end_date)]
    df = df[keep_columns]

    return df
eu_members= [
    "AUT",  # Austria
    "BEL",  # Belgium
    "BGR",  # Bulgaria
    "CYP",  # Cyprus
    "CZE",  # Czech Republic
    "DEU",  # Germany
    "DNK",  # Denmark
    "EST",  # Estonia
    "ESP",  # Spain
    "FIN",  # Finland
    "FRA",  # France
    "GRC",  # Greece
    "HRV",  # Croatia
    "HUN",  # Hungary
    "IRL",  # Ireland
    "ITA",  # Italy
    "LTU",  # Lithuania
    "LUX",  # Luxembourg
    "LVA",  # Latvia
    "MLT",  # Malta
    "NLD",  # Netherlands
    "POL",  # Poland
    "PRT",  # Portugal
    "ROU",  # Romania
    "SWE",  # Sweden
    "SVN",  # Slovenia
    "SVK",  # Slovakia
]
pd.set_option('display.max_rows', 10)

Egy egységes adatbázist akarunk létrehozni a két adathalmazból, nézzük meg, mit kell ehhez összerakni.

**Cél:** Heti adatokra lebontani a halálozásokat, mellé a Covid előtti átlagos halálozást betenni.

In [3]:
hu_halal=adatcsvkeep("2020-02-01","2020-02-01",eu_members,["iso_code","date","new_deaths"])
eu_halal=pd.read_csv("eu_deaths.csv")
display(hu_halal)
display(eu_halal)

Unnamed: 0,iso_code,date,new_deaths
21135,AUT,2020-02-01,0.0
31697,BEL,2020-02-01,0.0
49769,BGR,2020-02-01,0.0
76913,HRV,2020-02-01,0.0
81431,CYP,2020-02-01,0.0
...,...,...,...
277168,ROU,2020-02-01,0.0
308593,SVK,2020-02-01,0.0
310099,SVN,2020-02-01,0.0
320641,ESP,2020-02-01,0.0


Unnamed: 0,Country,2015-W01,2015-W02,2015-W03,2015-W04,2015-W05,2015-W06,2015-W07,2015-W08,2015-W09,...,2023-W43,2023-W44,2023-W45,2023-W46,2023-W47,2023-W48,2023-W49,2023-W50,2023-W51,2023-W52
0,AUT,1704.0,1768.0,1837.0,1792.0,1837.0,1926.0,1992.0,2020.0,1883.0,...,1761.0,1727.0,1759.0,1813.0,1817.0,1978.0,1875.0,1970.0,2071.0,1953.0
1,BEL,2461.0,2525.0,2526.0,2510.0,2644.0,2727.0,2892.0,2774.0,2643.0,...,2182.0,2059.0,2061.0,2225.0,2222.0,2278.0,2416.0,2459.0,2455.0,2434.0
2,BGR,2501.0,2512.0,2419.0,2339.0,2335.0,2295.0,2341.0,2535.0,2561.0,...,2020.0,1941.0,1990.0,2042.0,2123.0,2083.0,2010.0,2059.0,2023.0,2253.0
3,CYP,126.0,137.0,155.0,154.0,145.0,157.0,134.0,129.0,143.0,...,116.0,113.0,122.0,117.0,120.0,132.0,131.0,136.0,132.0,152.0
4,CZE,2378.0,2432.0,2396.0,2499.0,2580.0,2598.0,2732.0,2684.0,2609.0,...,2206.0,2133.0,2150.0,2315.0,2285.0,2362.0,2439.0,2422.0,2462.0,2470.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23,ROU,5803.0,5848.0,5602.0,5508.0,5560.0,5688.0,5815.0,6281.0,6139.0,...,4713.0,4587.0,4595.0,4478.0,4735.0,4741.0,4613.0,4759.0,4808.0,4089.0
24,SWE,1927.0,1966.0,1935.0,1946.0,1975.0,1868.0,2001.0,2107.0,2076.0,...,1806.0,1941.0,1863.0,1948.0,2038.0,2015.0,1965.0,2105.0,2163.0,2076.0
25,SVN,408.0,407.0,453.0,449.0,432.0,420.0,459.0,446.0,449.0,...,425.0,417.0,399.0,424.0,450.0,417.0,484.0,492.0,489.0,500.0
26,SVK,1112.0,1187.0,1210.0,1235.0,1292.0,1226.0,1275.0,1266.0,1235.0,...,1091.0,1036.0,1025.0,1048.0,1117.0,1195.0,1182.0,1185.0,1247.0,1246.0


### Interaktív megjelenítés

In [4]:
pd.set_option('display.max_rows', 10)

Írjunk egy függvény definíciót, amely összerakja a két adatsokaságot egy egységes táblázatba

In [5]:
def deathcountry(country):
    '''Adjuk meg a kívánt ország iso kódját

    Visszaad egy pd DataFramet
    '''
    import warnings
    warnings.simplefilter(action='ignore', category=FutureWarning)
    pd.options.mode.chained_assignment = None
    coviddata=pd.read_csv("owid-covid-data.csv")
    death=pd.read_csv("eu_deaths.csv")

    #covid halálok kinyerése
    coviddata=coviddata[coviddata["iso_code"]==country]
    coviddata=coviddata[["date","new_deaths"]]
    coviddata = coviddata[(coviddata["date"]>="2020-01-01") & (coviddata["date"]<="2022-12-31")]
    coviddata['date'] = pd.to_datetime(coviddata['date'])
    coviddata['week'] = coviddata['date'].dt.isocalendar().week
    coviddata['year'] = coviddata['date'].dt.year
    coviddata = coviddata.groupby(['year', 'week'])['new_deaths'].sum().reset_index()

    for row in range(len(coviddata)):
        if coviddata["week"][row]==53:
            coviddata["new_deaths"][row-1]+=coviddata["new_deaths"][row]
            coviddata=coviddata.drop(index=row)
    coviddata = coviddata.reset_index(drop=True)
    #covid halálok pipa

    #átlagos halál kinyerése
    death=death[death["Country"]==country]
    temp=[]
    for week in range(1,53):
        if week < 10:
            atlag = np.ceil(float((death[f"2015-W0{week}"]+death[f"2016-W0{week}"]+death[f"2017-W0{week}"]+death[f"2018-W0{week}"]+death[f"2019-W0{week}"])/5))
            temp.append(atlag)
        else:
            atlag = np.ceil(float((death[f"2015-W{week}"]+death[f"2016-W{week}"]+death[f"2017-W{week}"]+death[f"2018-W{week}"]+death[f"2019-W{week}"])/5))
            temp.append(atlag)
    atlag_halal=[]
    for i in range(0,3):
        for j in range(len(temp)):
            atlag_halal.append(temp[j])
    coviddata["avg_deaths"]=atlag_halal
    #átlagos halál pipa

    #összes halál kinyerése
    osszes_halal=[]
    for year in [2020,2021,2022]:
        for week in range(1,53):
            if week < 10:
                osszes_halal.append(float(death[f"{year}-W0{week}"]))
            else:
                osszes_halal.append(float(death[f"{year}-W{week}"]))
    coviddata["all_deaths"]=osszes_halal
    #összes halál pipa

    #covid halál nélkül
    covid_nelkul=[]
    for row in range(len(coviddata)):
        covid_nelkul.append(coviddata["all_deaths"][row]-coviddata["new_deaths"][row])
    coviddata["no_covid_deaths"]=covid_nelkul
    #covid halál nélkül pipa
    
    coviddata['Year_Week'] = coviddata['year'].astype(str) + '-' + coviddata['week'].astype(str)
    '''coviddata = coviddata.rename(columns={'year': 'Év'})
    coviddata = coviddata.rename(columns={'week': 'Hét'})
    coviddata = coviddata.rename(columns={'new_deaths': 'Covid Halálok'})'''
    return coviddata

In [9]:
deathcountry("HUN")

Unnamed: 0,year,week,new_deaths,avg_deaths,all_deaths,no_covid_deaths,Year_Week
0,2020,1,0.0,2886.0,2612.0,2612.0,2020-1
1,2020,2,0.0,2919.0,2585.0,2585.0,2020-2
2,2020,3,0.0,2925.0,2584.0,2584.0,2020-3
3,2020,4,0.0,3017.0,2723.0,2723.0,2020-4
4,2020,5,0.0,3057.0,2802.0,2802.0,2020-5
...,...,...,...,...,...,...,...
151,2022,48,42.0,2457.0,2647.0,2605.0,2022-48
152,2022,49,50.0,2576.0,2849.0,2799.0,2022-49
153,2022,50,43.0,2558.0,2803.0,2760.0,2022-50
154,2022,51,59.0,2639.0,2882.0,2823.0,2022-51


Definiáljunk egy függvény, ami egy iso code segítségével plotol egy ábrát

In [6]:

def showcountry(iso_code):
    df = deathcountry(iso_code)

    p = figure(x_range=df['Year_Week'], width=1000, height=500, title='Heti halálozások száma 2020-2023 között', x_axis_type="datetime", x_axis_label='Hetek', y_axis_label='Halálesetek száma', toolbar_location=None)

    avg_deaths = p.line(df['Year_Week'], df['avg_deaths'], line_width=2, legend_label='Ötéves átlag', color='black', line_cap='round')
    all_deaths = p.line(df['Year_Week'], df['all_deaths'], line_width=2, legend_label='Covid elhunytakkal', color='DeepPink', line_cap='round')
    new_deaths = p.line(df['Year_Week'], df['no_covid_deaths'], line_width=2, legend_label='Covid elhunytak nélkül', color='pink', line_cap='round')

    p.circle(df['Year_Week'], df['avg_deaths'], size=6, color='black', legend_label='Ötéves átlag')
    p.circle(df['Year_Week'], df['all_deaths'], size=6, color='DeepPink', legend_label='Covid elhunytakkal')
    p.circle(df['Year_Week'], df['no_covid_deaths'], size=6, color='pink', legend_label='Covid elhunytak nélkül')

    hover_avg_deaths = HoverTool(renderers=[avg_deaths], tooltips=[('Dátum', '@x'), ('Ötéves átlag', '@y')])
    hover_all_deaths = HoverTool(renderers=[all_deaths], tooltips=[('Dátum', '@x'), ('Covid elhunytakkal', '@y')])
    hover_new_deaths = HoverTool(renderers=[new_deaths], tooltips=[('Dátum', '@x'), ('Covid elhunytak nélkül', '@y')])

    p.add_tools(hover_new_deaths, hover_avg_deaths, hover_all_deaths)
    p.grid.grid_line_color = None
    #p.xaxis.major_label_orientation = 1

    p.toolbar.active_drag = None
    p.toolbar.active_scroll = None
    p.toolbar.active_tap = None

    p.xaxis.major_label_text_font_size = "0pt"

    # Customize figure
    p.legend.location = 'top_right'
    output_notebook()
    show(p)
    return


Futtassuk Magyarországra

In [7]:
showcountry("HUN")

Ez szuper, de macerás lenne lefuttatni egyenként az összes országra, ezért tegyük okosabbá, ahol csak ki kell választani egy ország iso kódját, és azátlal kiplotolja a megfelelő ábrát

In [8]:
import ipywidgets as widgets

iso = eu_members

dd1 = widgets.Dropdown(options = iso, description = 'Ország')


def draw_part(iso):
    showcountry(iso)
    
    
out = widgets.interactive_output(draw_part, controls={'iso':dd1})
ui = widgets.HBox([dd1])

display(out, ui)

Output()

HBox(children=(Dropdown(description='Ország', options=('AUT', 'BEL', 'BGR', 'CYP', 'CZE', 'DEU', 'DNK', 'EST',…