In [None]:
import pandas as pd
from constants import *
from sklearn.cluster import KMeans
from flows.specific.custom_flow_from_shapefile import  CustomFlowFromShapefile
import os
from data_sources.specific.coca import Coca
from data_sources.specific.gold_stock_price import GoldStockPrice
from data_sources.specific.malaria_cases import Malaria
from embedders.specific.identity_embedder import IdentityEmbbeder
from embedders.specific.aggregation_embedder import AggregationEmbedder
from embedders.specific.linear_regression_coefficient_embedder import LinearRegressionCoefficientEmbedder

from clusterers.specific.identity_clusterer import IdentityClusterer
from clusterers.specific.sklearn_vector_clusterer import SklearnVectorClusterer
from clusterers.specific.quantile_clusterer import QuantileClusterer


In [None]:
fuentes_incluidas = [Malaria]
target_col = 'malaria_num_cases_total'

In [None]:
flow = CustomFlowFromShapefile(ID = 'basic_analysis',
                                name = "Basic Analysis",
                                time_resolution = MONTH,
                                shapefile_location = os.path.join(GEO_DATA_FOLDER,"municipalities/municipalities.shp"),
                                id_column = "muni_id",
                                vector_data_sources=fuentes_incluidas,
                                matrix_data_sources=[],
                                embedder=AggregationEmbedder(),
                                clusterer=QuantileClusterer(ID="quantile", name="Quantile"))

In [None]:
flow.initial_geography = flow.get_initial_geography()

# Loads Data
# ----------------------
flow.df_vector, flow.df_matrix = flow.loadData(flow.initial_geography)
flow.data_loaded = True

In [None]:
flow.df_vector

In [None]:
# Embbeds Data
# ----------------------
flow.embeddData()
flow.data_embedded = True

flow.df_embedded_vector

In [None]:
# Clusters Data
# ----------------------
flow.clusterData()
flow.data_clustered = True
flow.clustered_ids

In [None]:
# Builds final geography
# ----------------------
flow.buildFinalGeography()

In [None]:
flow.final_geography

In [None]:
flow.final_geography[CLUSTER_ID] = flow.final_geography[ID].apply(lambda i : f"Cluster {i}")
flow.final_geography.plot(column = CLUSTER_ID, figsize = (8,8), legend=True)