
# COVID-19 — Storytelling e Gráficos (Notebook)

Este notebook consome **CSVs processados** e gera gráficos com **matplotlib**.  
> Diretrizes: **sem seaborn**, e **1 gráfico por plot**.


In [None]:

import os, glob
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

DATA_DIR = os.getenv("DATA_DIR", "data/processed")
CHARTS_DIR = os.getenv("CHARTS_DIR", "charts")
os.makedirs(CHARTS_DIR, exist_ok=True)

def read_csv_best_effort(path):
    for enc in ["utf-8","utf-8-sig","latin-1"]:
        try:
            return pd.read_csv(path, encoding=enc)
        except Exception:
            pass
    return pd.read_csv(path, encoding="utf-8", errors="ignore")

def find_date_col(df):
    for c in df.columns:
        cl = str(c).lower()
        if cl in ["date","data","day","dt","dia"] or ("date" in cl) or ("data" in cl):
            try:
                out = pd.to_datetime(df[c], errors="coerce", infer_datetime_format=True)
                if out.notna().mean() > 0.5:
                    return c
            except Exception:
                continue
    return None

def simple_line_plot(x, y, title, xlabel, ylabel, outfile):
    plt.figure(figsize=(10,4))
    plt.plot(x, y)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    plt.tight_layout()
    plt.savefig(os.path.join(CHARTS_DIR, outfile))
    plt.close()

def simple_bar_plot(categories, values, title, xlabel, ylabel, outfile, rotate_xticks=0):
    plt.figure(figsize=(10,5))
    plt.bar(categories, values)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)
    if rotate_xticks:
        plt.xticks(rotation=rotate_xticks, ha="right")
    plt.tight_layout()
    plt.savefig(os.path.join(CHARTS_DIR, outfile))
    plt.close()


In [None]:

# MM7 Casos Brasil
import os, glob
targets = ["701_brasil_mm7_casos.csv"]
if not any(os.path.exists(os.path.join(DATA_DIR, t)) for t in targets):
    gv = glob.glob(os.path.join(DATA_DIR, "*brasil*mm7*cas*.*"))
    if gv:
        targets = [os.path.basename(gv[0])]

for fname in targets:
    for base in [DATA_DIR, "."]:
        path = os.path.join(base, fname)
        if os.path.exists(path):
            df = read_csv_best_effort(path)
            date_col = find_date_col(df)
            if date_col is None:
                continue
            val_cols = [c for c in df.columns if ("mm7" in c.lower()) or ("média" in c.lower()) or ("media_movel" in c.lower())]
            if not val_cols:
                val_cols = [c for c in df.columns if ("new" in c.lower() and "case" in c.lower()) or ("nov" in c.lower() and "cas" in c.lower())]
            if not val_cols:
                continue
            cval = val_cols[0]
            df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
            df = df.sort_values(date_col)
            simple_line_plot(df[date_col], df[cval], "Brasil — MM7 de Casos", "Data", "MM7 de Casos", "mm7_casos_brasil.png")
            print("Gerado: charts/mm7_casos_brasil.png")
            break


In [None]:

# MM7 Mortes Brasil
import os, glob
targets = ["702_brasil_mm7_mortes.csv"]
if not any(os.path.exists(os.path.join(DATA_DIR, t)) for t in targets):
    gv = glob.glob(os.path.join(DATA_DIR, "*brasil*mm7*mort*.*"))
    if gv:
        targets = [os.path.basename(gv[0])]

for fname in targets:
    for base in [DATA_DIR, "."]:
        path = os.path.join(base, fname)
        if os.path.exists(path):
            df = read_csv_best_effort(path)
            date_col = find_date_col(df)
            if date_col is None:
                continue
            val_cols = [c for c in df.columns if ("mm7" in c.lower()) or ("média" in c.lower()) or ("media_movel" in c.lower())]
            if not val_cols:
                val_cols = [c for c in df.columns if ("new" in c.lower() and "death" in c.lower()) or ("nov" in c.lower() and ("morte" in c.lower() or "obit" in c.lower()))]
            if not val_cols:
                continue
            cval = val_cols[0]
            df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
            df = df.sort_values(date_col)
            simple_line_plot(df[date_col], df[cval], "Brasil — MM7 de Mortes", "Data", "MM7 de Mortes", "mm7_mortes_brasil.png")
            print("Gerado: charts/mm7_mortes_brasil.png")
            break


In [None]:

# Top 10 — Casos por milhão
import os
cands = ["Top_10_em_casos_por_milhão.csv","721_ranking_global_casos_por_milhao.csv"]
found = None
for c in cands:
    for base in [DATA_DIR, "."]:
        p = os.path.join(base, c)
        if os.path.exists(p):
            found = p; break
    if found: break

if found:
    df = read_csv_best_effort(found)
    country_col = None
    for c in df.columns:
        if c.lower() in ["country","country_region","país","pais"]:
            country_col = c; break
    val_col = None
    for c in df.columns:
        cl = c.lower()
        if ("per_million" in cl) or ("por_milh" in cl):
            val_col = c; break
    if (country_col is not None) and (val_col is not None):
        dft = df[[country_col, val_col]].dropna().head(10)
        simple_bar_plot(dft[country_col].astype(str), dft[val_col].astype(float),
                        "Top 10 — Casos por milhão", "País", "Casos por milhão",
                        "top10_casos_por_milhao.png", rotate_xticks=45)
        print("Gerado: charts/top10_casos_por_milhao.png")


In [None]:

# Top 10 — Mortes por milhão
import os
cands = ["Top_10_em_mortes_por_milhão.csv","722_ranking_global_mortes_por_milhao.csv"]
found = None
for c in cands:
    for base in [DATA_DIR, "."]:
        p = os.path.join(base, c)
        if os.path.exists(p):
            found = p; break
    if found: break

if found:
    df = read_csv_best_effort(found)
    country_col = None
    for c in df.columns:
        if c.lower() in ["country","country_region","país","pais"]:
            country_col = c; break
    val_col = None
    for c in df.columns:
        cl = c.lower()
        if (("per_million" in cl) or ("por_milh" in cl)) and (("death" in cl) or ("morte" in cl)):
            val_col = c; break
    if val_col is None:
        for c in df.columns:
            cl = c.lower()
            if ("per_million" in cl) or ("por_milh" in cl):
                val_col = c; break
    if (country_col is not None) and (val_col is not None):
        dft = df[[country_col, val_col]].dropna().head(10)
        simple_bar_plot(dft[country_col].astype(str), dft[val_col].astype(float),
                        "Top 10 — Mortes por milhão", "País", "Mortes por milhão",
                        "top10_mortes_por_milhao.png", rotate_xticks=45)
        print("Gerado: charts/top10_mortes_por_milhao.png")


In [None]:

# Comparativo — Brasil vs vizinhos (casos por milhão)
import os
cands = ["Comparação_focada_Brasil_vs_vizinhos.csv","723_evolucao_5paises_casos_por_milhao.csv"]
found = None
for c in cands:
    for base in [DATA_DIR, "."]:
        p = os.path.join(base, c)
        if os.path.exists(p):
            found = p; break
    if found: break

if found:
    df = read_csv_best_effort(found)
    date_col = None
    for c in df.columns:
        if c.lower() in ["date","data","day","dt","dia"] or "date" in c.lower() or "data" in c.lower():
            date_col = c; break
    if date_col:
        df[date_col] = pd.to_datetime(df[date_col], errors="coerce")
        ccol = None
        for c in df.columns:
            if c.lower() in ["country","country_region","país","pais"]:
                ccol = c; break
        vcol = None
        for c in df.columns:
            cl = c.lower()
            if ("per_million" in cl) or ("por_milh" in cl):
                vcol = c; break
        if ccol and vcol:
            plt.figure(figsize=(10,4))
            for pais, dfg in df.dropna(subset=[vcol]).groupby(ccol):
                dfo = dfg.sort_values(date_col)
                plt.plot(dfo[date_col], dfo[vcol], label=str(pais))
            plt.title("Casos por milhão — Brasil vs vizinhos")
            plt.xlabel("Data")
            plt.ylabel("Casos por milhão")
            plt.legend()
            plt.tight_layout()
            plt.savefig(os.path.join(CHARTS_DIR, "comparativo_br_vizinhos_casos_pm.png"))
            plt.close()
            print("Gerado: charts/comparativo_br_vizinhos_casos_pm.png")



## ✅ Saídas geradas
As imagens são salvas em `charts/` e podem ser referenciadas no README:
```markdown
![MM7 Casos Brasil](charts/mm7_casos_brasil.png)
![MM7 Mortes Brasil](charts/mm7_mortes_brasil.png)
![Top 10 Casos/milhão](charts/top10_casos_por_milhao.png)
![Top 10 Mortes/milhão](charts/top10_mortes_por_milhao.png)
![Comparativo BR vs Vizinhos](charts/comparativo_br_vizinhos_casos_pm.png)
```
