In [12]:
import pandas as pd

# Load the Excel file
file_path = '../data/input/clasificacion_variables.xlsx'
df = pd.read_excel(file_path)

# Display the columns to verify the column names
print(df.columns)

# Define the dictionary for renaming observations in the appropriate column
variable_names = { 
    "year": "year",
    "Central government debt, total (% of GDP)": "gov_debt_gdp",
    "Domestic credit to private sector (% of GDP)": "priv_cred_gdp",
    "Educational attainment, at least completed upper secondary, population 25+, total (%) (cumulative)": "edu_att_upper",
    "GDP per person employed (constant 2021 PPP $)": "gdp_per_empl",
    "Intentional homicides (per 100,000 people)": "intent_hom",
    "Interest rate spread (lending rate minus deposit rate, %)": "int_rate_spread",
    "Internally displaced persons, new displacement associated with disasters (number of cases)": "displ_pax_disaster",
    "Logistics performance index: Ease of arranging competitively priced shipments (1=low to 5=high)": "log_ease_ships",
    "Logistics performance index: Overall (1=low to 5=high)": "log_overall",
    "Logistics performance index: Quality of trade and transport-related infrastructure (1=low to 5=high)": "log_infra",
    "People using at least basic drinking water services (% of population)": "basic_drink_water",
    "People using at least basic sanitation services (% of population)": "basic_san",
    "Public private partnerships investment in transport (current US$)": "ppp_transp_invest",
    "Renewable energy consumption (% of total final energy consumption)": "renew_energy_use",
    "Research and development expenditure (% of GDP)": "rnd_exp_gdp",
    "Researchers in R&D (per million people)": "research_per_m",
    "School enrollment, preprimary (% gross)": "preprimary_enroll",
    "School enrollment, primary (% net)": "primary_enroll",
    "School enrollment, secondary (% net)": "secondary_enroll",
    "Access to electricity (% of rural population with access": "rural_ele_access",
    "Access to electricity (% of urban population with access": "urban_ele_access",
    "Unnamed: 0": "unnamed_0",
    "Age-standardized suicide rates (per 100 000 population)": "suicide_rates",
    "Current health expenditure (CHE) as percentage of gross domestic product (GDP) (%)": "health_exp_gdp",
    "Deaths per 1 000 live births": "deaths_per_births",
    "Maternal mortality ratio (per 100 000 live births)": "maternal_mort_rat",
    "Medical doctors (per 10,000)": "medical_doctors_per_10k",
    "Premature deaths due to noncommunicable diseases (NCD) as a proportion of all NCD deaths": "premature_deaths",
    "UHC Service Coverage sub-index on noncommunicable diseases": "uhc_ncd_coverage",
    "Borrowed from a formal financial institution (% age 15+)": "borrowed_formal_fin",
    "Financial institution account (% age 15+)": "fin_account_age",
    "Made or received a digital payment (% age 15+)": "dig_payment_age",
    "Acidification": "acidification",
    "Agriculture": "agriculture",
    "Air Quality": "air_quality",
    "Biodiversity & Habitat": "bio_habitat",
    "Climate Change": "climate_change",
    "Ecosystem Services": "eco_services",
    "Environmental Performance Index": "environ_perf",
    "Fisheries": "fisheries",
    "Heavy Metals": "heavy_metals",
    "Sanitation & Drinking Water": "san_drink_water",
    "Sustainable Pesticide Use": "sust_pest_use",
    "UnSafe sanitation": "unsafe_san",
    "Unsafe Drinking Water": "unsafe_drink_w",
    "Waste Management": "waste_mgmt",
    "Wastewater Treatment": "wastew_treat",
    "Water Resources": "water_res",
    "Completion rate, primary education, both sexes (%)": "primary_compl_rate",
    "Proportion of students at the end of lower secondary education achieving at least a minimum proficiency level in mathematics, both sexes (%)": "prof_math",
    "Net enrolment rate, early childhood education, both sexes (%)": "early_enroll",
    "Percentage of pupils enrolled in primary education who are at least 2 years over-age for their current grade, both sexes (%)": "over_age_primary",
    "Percentage of pupils enrolled in lower secondary general education who are at least 2 years over-age for their current grade, both sexes (%)": "over_age_sec",
    "Participation rate of youth and adults in formal and non-formal education and training in the previous 12 months, 15-24 years old, both sexes (%)": "part_formal_nonf",
    "Proportion of students at the end of lower secondary education achieving at least a minimum proficiency level in reading, both sexes (%)": "prof_reading",
    "Population using safely managed sanitation services (%)": "safely_mg_san",
    "Agricultural water withdrawal as % of total renewable water resources": "agri_water_withd",
    "Agricultural R&D spending (constant 2011 PPP$)": "agri_rnd_sp",
    "Agricultural researchers (FTE)": "agri_res_fte",
    "Total Agro Emissions": "total_agro_emi",
    "rtfpna": "rtfpna",
    "rgdpna": "rgdpna",
    "pop": "pop",
    "gdppcppp_const": "gdppcppp_const",
    "Electric power transmission and distribution losses (% of output)": "ele_power_trans",
    "CO2 emissions from transport (% of total fuel combustion)": "co2_trans",
    "Number_of_products_Value": "products_value",
    "Concentration_Index_Value": "concent_index",
    "E-Government_Index":"egov_index",
    "Prison Population Rate": "pr_pop_rate",
    "Water Use Efficiency Rate": "water_use_ef",
    "Control of Corruption": "control_corrupt", 
    "Economic Complexity Index": "eci_rank",
    "country": "country",
    "country_code":"country_code"
}
df["variable"] = ""
for var in variable_names:
    df.loc[df["variable_name"] == var, "variable"] = variable_names[var]
df = df.drop(columns="variable_name")

df["clasificacion"] = df["sector"]
# Save the DataFrame as a .dta file
output_file_path = '../data/output/clasificacion_variables.dta'
df.to_stata(output_file_path)

print(output_file_path)


Index(['variable_name', 'sector_area', 'sector', 'area_aux', 'sector_aux'], dtype='object')
../data/output/clasificacion_variables.dta


In [13]:
import pandas as pd

# Load the Excel file
excel_path = '../data/input/pwt1001.xlsx'  # replace with your actual file path
excel_data = pd.read_excel(excel_path, sheet_name='Data')

# Transform the dataset to panel data structure for the variable 'rtfpna'
pwt_df = excel_data[['countrycode', 'country', 'year', 'rtfpna',"rgdpna","pop"]]

pwt_df["gdppcppp_const"] = pwt_df["rgdpna"]/pwt_df["pop"]

# Save the transformed dataframe as a CSV file
output_pwt_path = '../data/output/pwt_df.csv'  # replace with your desired output file path

pwt_df = pwt_df.rename(columns = {"countrycode":"country_code"})
pwt_df = pwt_df.loc[pwt_df["year"]>2009,]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pwt_df["gdppcppp_const"] = pwt_df["rgdpna"]/pwt_df["pop"]


In [16]:
income_group_new = pd.read_stata("../data/input/income_group_new.dta")

income_group_new = pd.merge(income_group_new, pwt_df, how="left",on=["country_code","year"])

In [22]:
main_df = pd.read_csv("../data/output/main_df13.csv")
main_df = main_df.drop(columns="gdppcppp_const")
income_group_new = pd.read_stata("../data/input/income_group_new.dta")
income_group_new = income_group_new[["gdppcppp_const","country_code","year"]]
main_df = pd.merge(main_df, income_group_new, how="left", on=["country_code","year"])
main_df.to_csv("../data/output/main_df14.csv")


In [2]:
import pandas as pd
main_df = pd.read_csv("../data/output/main_df13.csv")

In [11]:
import plotly.graph_objects as go

# Categories (renamed as (1), (2), ... (10))
categories = ['(1)', '(2)', '(3)', '(4)', '(5)', '(6)', '(7)', '(8)', '(9)', '(10)']

# Old OCDE values (used for normalization)
ocde_values_raw = [1.00, 1.06, 7.49, 0.98, 0.87, 82.54, 18.57, 3.55, 0.33, 0.88]

# Values for Uruguay and LAC before normalization
uruguay_values_raw = [1.06, 1.37, 17.50, 0.95, 0.97, 77.38, 18.49, 3.39, 1.15, 2.74]
lac_values_raw = [1.02, 1.14, 82.26, 0.84, 0.83, 68.47, 25.52, 6.80, 0.64, 4.11]

# Assign 100 to OCDE values (as they represent the full polygon)
ocde_values = [100] * len(ocde_values_raw)

# Normalize Uruguay and LAC values as a percentage of old OCDE values
uruguay_values = [(u / o) * 100 for u, o in zip(uruguay_values_raw, ocde_values_raw)]
lac_values = [(l / o) * 100 for l, o in zip(lac_values_raw, ocde_values_raw)]

# Create radar chart
fig = go.Figure()

# Add trace for Uruguay (blue color, solid background)
fig.add_trace(go.Scatterpolar(
    r=uruguay_values,
    theta=categories,
    fill='toself',
    name='Uruguay',
    line=dict(color='blue'),
    opacity=1
))

# Add trace for LAC (green color, only lines)
fig.add_trace(go.Scatterpolar(
    r=lac_values,
    theta=categories,
    fill=None,
    name='LAC',
    line=dict(color='green')
))

# Add trace for OCDE (light blue color, full polygon)
fig.add_trace(go.Scatterpolar(
    r=ocde_values,
    theta=categories,
    fill='toself',
    name='OCDE (100%)',
    line=dict(color='lightblue')
))

# Update layout for background, Montserrat font, and legend
fig.update_layout(
    polar=dict(
        bgcolor='white',
        radialaxis=dict(
            visible=False,  # Hide 0, 20, 40... markers
        ),
        angularaxis=dict(
            linewidth=2,
            linecolor='darkgray'
        )
    ),
    font=dict(
        family="Montserrat"
    ),
    showlegend=True,
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.2,
        xanchor="center",
        x=0.5
    )
)

# Show the plot
fig.show()