# Trend and Spatial Analysis of Congenital Malformations in Brazil (2015-2024)
## A Demonstration Notebook for a Scientific Paper

This notebook demonstrates the use of the `datasus_epi` package to perform a complete analysis of the rates of congenital malformations in Brazil, covering the period from 2015 to 2024. The analysis includes:

1.  **Temporal Trend Analysis (2015-2024):**
    *   Calculation of prevalence rates for all anomalies (ICD-10, Chapter XVII - "Q") by Region.
    *   Application of trend tests (Linear Regression and Mann-Kendall).
    *   Visualization of time series.
2.  **Spatial Analysis (2015, 2020, and 2024):**
    *   Calculation of prevalence rates by municipality for the years 2015, 2020, and 2024.
    *   Spatial autocorrelation analysis (Global Moran's I and LISA) for each of these years.
    *   Visualization of spatial cluster maps.

### 1. Data Loading and Preparation

In [2]:
import pandas as pd
from datasus_epi.sinasc.taxas import obter_taxa_sinasc

years = list(range(2015, 2025))
cid_prefix = 'Q'

print(f"Analyzing the period from {years[0]} to {years[-1]} for CID '{cid_prefix}'...")

regional_rates = obter_taxa_sinasc(
    anos=years,
    cid=cid_prefix,
    estratos=['REGIAO'],
    retorno='polars'
)

print("Prevalence rates per 100,000 live births (by Region and Year):")
display(regional_rates.head())

ModuleNotFoundError: No module named 'polars'

### 2. Temporal Trend Analysis by Region

In [None]:
from datasus_epi.analysis.trends import linear_regression, mann_kendall

time_series_table = regional_rates.pivot(
    index="REGIAO",
    columns="ano",
    values="taxa_por_100000"
)

print("Time Series Table (rates per 100k):")
display(time_series_table)

regression_results = linear_regression(time_series_table)
mk_results = mann_kendall(time_series_table)

print("Linear Regression Results by Region:")
display(regression_results)

print("
Mann-Kendall Test Results by Region:")
display(mk_results)

### 3. Temporal Trend Visualization

In [None]:
from datasus_epi.viz.trends import plot_single_trend_graph, plot_trend_grid
import matplotlib.pyplot as plt

graph_title = f"Prevalence of Congenital Anomalies by Region ({years[0]}-{years[-1]})"

plot_single_trend_graph(time_series_table, graph_title)
plt.show()

plot_trend_grid(time_series_table, regression_results, graph_title)
plt.show()

### 4. Spatial Analysis by Municipality (2015, 2020, 2024)

In [None]:
from datasus_epi.analysis.spatial import create_neighborhood_matrix, global_moran, local_lisa
from datasus_epi.viz.maps import plot_choropleth_map

spatial_years = [2015, 2020, 2024]

for year in spatial_years:
    print(f"---Starting spatial analysis for the year {year}... ---")

    gdf = obter_taxa_sinasc(
        anos=[year],
        cid=cid_prefix,
        estratos=["CODMUNRES"],
        retorno="geopandas"
    )

    analysis_gdf = gdf[gdf['n_nascidos_vivos'].notna()].copy()
    print(f"{len(analysis_gdf)} municipalities with data found.")

    if not analysis_gdf.empty:
        w = create_neighborhood_matrix(analysis_gdf, method="queen")

        moran_i, moran_p = global_moran(analysis_gdf, "taxa_por_100000", w)
        print(f"Global Moran's I ({year}): {moran_i:.4f} (p-value: {moran_p:.4f})")

        lisa_gdf = local_lisa(analysis_gdf, "taxa_por_100000", w)

        print(f"LISA cluster count ({year}):")
        print(lisa_gdf["lisa_cluster"].value_counts())

        fig, axes = plt.subplots(1, 2, figsize=(18, 8))

        plot_choropleth_map(
            lisa_gdf,
            column="taxa_por_100000",
            ax=axes[0],
            legend_title="Rate per 100k (Quartiles)
        )
        axes[0].set_title(f"Choropleth Map of Raw Rate by Municipality ({year})")
        axes[0].axis('off')

        cluster_colors = {
            'Not significant': 'lightgrey',
            'High-High': '#d7191c',
            'Low-Low': '#2c7bb6',
            'Low-High': '#abd9e9',
            'High-Low': '#fdae61'
        }

        lisa_gdf.plot(
            column='lisa_cluster',
            categorical=True,
            cmap='viridis',
            legend=True,
            legend_kwds={'title': "LISA Clusters", 'loc': 'lower right'},
            ax=axes[1],
            edgecolor='white',
            linewidth=0.1,
            color=[cluster_colors.get(c, 'black') for c in lisa_gdf['lisa_cluster']]
        )
        axes[1].set_title(f"LISA Cluster Map ({year})")
        axes[1].axis('off')

        plt.tight_layout()
        plt.show()
    else:
        print("No data to plot the maps.")