# Análise Exploratoria – Município 1100148 (Theobroma – RO)

In [None]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display
import requests
from io import StringIO

GITHUB_RAW_URL = "https://raw.githubusercontent.com/TollerSPC/analise-eda-ibge/main/br_ibge_censo_agropecuario_municipio.csv"
CODIGO_COL = None
POP_COL = None
MIN_HAB = 100000
TARGET_CODE = 1100148
TARGET_NAME = "Theobroma - RO"

try:
    df = pd.read_csv(GITHUB_RAW_URL, low_memory=False)
except:
    r = requests.get(GITHUB_RAW_URL)
    text = r.content.decode("utf-8", errors="replace")
    sep = "," if text.count(",") >= text.count(";") else ";"
    df = pd.read_csv(StringIO(text), sep=sep, low_memory=False)

def percent_present(s):
    return 100 * s.notna().sum() / len(s)

if CODIGO_COL is None:
    cands = [c for c in df.columns if any(k in c.lower() for k in ["cod","ibge","municip"])]
    CODIGO_COL = cands[0] if cands else None

if POP_COL is None:
    pops = [c for c in df.columns if any(k in c.lower() for k in ["pop","habit"])]
    POP_COL = pops[0] if pops else None

def clean_numeric_column(s):
    if pd.api.types.is_numeric_dtype(s): return s
    s2 = s.astype(str).str.strip().replace({"nan":None,"":"", "None":None})
    def f(v):
        if v is None or v=="" or v.lower() in ["nan","none","null"]: return np.nan
        if v.count(",")>0 and v.count(".")>0:
            v=v.replace(".","").replace(","," ")
        v=v.replace(",",".")
        import re
        v=re.sub(r"[^0-9.\-]","",v)
        try: return float(v)
        except: return np.nan
    return s2.map(f)

df_clean = df.copy()

numeric_candidates=[]
for c in df.columns:
    if df[c].dtype=="object":
        nn=df[c].dropna().astype(str)
        if len(nn)>0:
            if nn.map(lambda x:any(d.isdigit() for d in x)).mean()>0.6:
                numeric_candidates.append(c)

for c in numeric_candidates:
    df_clean[c+"_num"] = clean_numeric_column(df_clean[c])

if POP_COL and POP_COL in df_clean.columns:
    if not pd.api.types.is_numeric_dtype(df_clean[POP_COL]):
        df_clean[POP_COL+"_num"] = clean_numeric_column(df_clean[POP_COL])
        POP_COL = POP_COL+"_num"

if CODIGO_COL and CODIGO_COL in df_clean.columns:
    try:
        city_df = df_clean[df_clean[CODIGO_COL].astype(str)==str(TARGET_CODE)]
        if city_df.empty:
            city_df = df_clean.iloc[[0]]
    except:
        city_df = df_clean.iloc[[0]]
else:
    city_df = df_clean.iloc[[0]]

display(city_df.T)

num_cols=[c for c in df_clean.columns if pd.api.types.is_numeric_dtype(df_clean[c])]
display(df_clean[num_cols].describe().T)

presence = pd.Series({c:percent_present(df_clean[c]) for c in df_clean.columns})
display(presence.sort_values().head())

plt.rcParams["figure.figsize"]=(10,6)
for c in num_cols[:6]:
    plt.figure()
    df_clean[c].dropna().hist(bins=30)
    plt.title(c)
    plt.show()

