In [7]:
import pandas as pd
from pathlib import Path

BASE_DIR = Path("../data")
PROC_MAIN = BASE_DIR / "processed_csv/processed_csv_concat_with_iso3.csv"
PROC_OTHER = BASE_DIR / "processed_excel"

# --- 1) Cargar ---
df_main = pd.read_csv(PROC_MAIN)
df_overall = pd.read_csv(PROC_OTHER / "overall_data_clean.csv")
df_eee = pd.read_csv(PROC_OTHER / "eee_pom_per_year_long.csv")
df_ewaste = pd.read_csv(PROC_OTHER / "ewaste_generated_per_year_long.csv")
df_share_ewaste = pd.read_csv(PROC_OTHER / "share_categories_ewaste_generat_categories_long.csv")
df_share_eee = pd.read_csv(PROC_OTHER / "share_eee_categories_categories_long.csv")

# --- 2) Normalizar nombres ---
for df in [df_main, df_eee, df_ewaste, df_overall, df_share_ewaste, df_share_eee]:
    df.columns = df.columns.str.strip().str.lower()

# --- 3) Eliminar columnas duplicables ---
for df in [df_overall, df_eee, df_ewaste, df_share_ewaste, df_share_eee]:
    df.drop(columns=[c for c in ["country", "country_clean"] if c in df.columns], inplace=True)

# --- 4) Hallar intersección de países ---
sets = [
    set(df_main["iso3"].dropna().unique()),
    set(df_overall["iso3"].dropna().unique()),
    set(df_eee["iso3"].dropna().unique()),
    set(df_ewaste["iso3"].dropna().unique()),
    set(df_share_ewaste["iso3"].dropna().unique()),
    set(df_share_eee["iso3"].dropna().unique())
]
common_iso3 = set.intersection(*sets)
print("Países comunes:", common_iso3)

# --- 5) Filtrar a países comunes ---
df_main = df_main[df_main["iso3"].isin(common_iso3)]
df_overall = df_overall[df_overall["iso3"].isin(common_iso3)]
df_eee = df_eee[df_eee["iso3"].isin(common_iso3)]
df_ewaste = df_ewaste[df_ewaste["iso3"].isin(common_iso3)]
df_share_ewaste = df_share_ewaste[df_share_ewaste["iso3"].isin(common_iso3)]
df_share_eee = df_share_eee[df_share_eee["iso3"].isin(common_iso3)]

# --- 6) Merge final limpio ---
master = df_main.merge(df_overall, on="iso3", how="left")
master = master.merge(df_eee, on=["iso3", "year"], how="left")
master = master.merge(df_ewaste, on=["iso3", "year"], how="left")
master = master.merge(df_share_ewaste, on="iso3", how="left")
master = master.merge(df_share_eee, on="iso3", how="left")

print("Filas:", len(master))
print("Columnas:", len(master.columns))

OUTPUT = BASE_DIR / "master_final_dataset.csv"
master.to_csv(OUTPUT, index=False)
print("✅ Master final creado en:", OUTPUT)


Países comunes: {'BOL', 'ARG', 'PER', 'VEN', 'PAN', 'URY', 'NIC', 'CHL', 'GTM', 'ECU', 'HND'}
Filas: 1980
Columnas: 34
✅ Master final creado en: ..\data\master_final_dataset.csv


In [5]:
print(df_eee.columns)
print(df_ewaste.columns)
print(df_share_ewaste.columns)
print(df_share_eee.columns)
print(df_main.columns)


Index(['country', 'year', 'value', 'country_clean', 'iso3'], dtype='object')
Index(['country', 'year', 'value', 'country_clean', 'iso3'], dtype='object')
Index(['country', 'category', 'share_value', 'country_clean', 'iso3'], dtype='object')
Index(['country', 'category', 'share_value', 'country_clean', 'iso3'], dtype='object')
Index(['Country', 'Year', 'Population', 'E_waste_generated_kt',
       'EEE_put_on_market_kt', 'E_waste_collection_rate',
       'E_waste_formally_collected_kt', 'E_waste_imported_kt',
       'E_waste_exported_kt', 'E_waste_generated_per_capita',
       'EEE_put_on_market_per_capita', 'Temperature_Exchange_Equipment_kt',
       'Screens_kt', 'Lamps_kt', 'Large_Equipment_kt', 'Small_Equipment_kt',
       'Small_IT_kt', 'source_file', 'country_clean', 'iso3'],
      dtype='object')
