In [49]:
from pathlib import Path
import sys
import pandas as pd
from sqlalchemy import create_engine
import os

In [50]:
current_dir = os.getcwd()
data_dir = os.path.join(current_dir, "data")
dta_path = os.path.join(data_dir, "DDCGdata_final.dta")

### For map graphic

In [51]:
df = pd.read_stata(dta_path)

One or more strings in the dta file could not be decoded using utf-8, and
so the fallback encoding of latin-1 is being used.  This can happen when a file
has been incorrectly encoded by Stata or some other software. You should verify
the string values returned are correct.
  df = pd.read_stata(dta_path)


In [52]:
trial1 = df[['country_name', 'wbcode', 'year', 'yeardem', 'dem', 'yearrev', 'region']]
trial1 = trial1.rename(columns={
    'country_name': 'name',
    'wbcode': 'code'
})

In [53]:
vis1_dir=os.path.join(current_dir, "final_html")

In [54]:
csv_path = os.path.join(vis1_dir, "trial1.csv")
#trial1.to_csv(csv_path, index=False)
trial1.loc[trial1["code"] == "ZAR", "code"] = "COD"

#Correctiong of the map
non_afr = ["AFG", "YEM", "OMN", "SAU", "QAT", "BHR", "KWT", "UAE",
           "IRQ", "IRN", "JOR", "SYR", "LBN", "PAK"]

trial1.loc[trial1["code"].isin(non_afr), "region"] = "NO-AFR"


In [55]:
#getting the information for LAC
LAC = trial1[trial1["region"].isin(["LAC"])]
LAC=LAC.rename(columns={'code': 'id'})
lac_path = os.path.join(vis1_dir, "lac.csv")
LAC.to_csv(lac_path, index=False)

In [56]:
lac_2010 = LAC[LAC["year"] == 2010]
lac_2010_path = os.path.join(vis1_dir, "lac_2010.csv")
lac_2010.to_csv(lac_2010_path, index=False)

In [57]:
# Getting the information for Africa
AFR = trial1[trial1["region"].isin(["AFR", "MNA"])]
AFR=AFR.rename(columns={'code': 'id'})
afr_path = os.path.join(vis1_dir, "afr.csv")
AFR.to_csv(afr_path, index=False)

In [58]:
trial1 = trial1.drop(columns=['yeardem'])
trial1 = trial1.rename(columns={'dem': 'yeardem'})

### For inequality/Repression graphic


In [59]:
df=pd.read_csv("data/V-Dem-CY-Core-v15.csv")

In [60]:
repression=df[['country_name', 'country_text_id', 'year', 'v2csreprss', 'v2x_libdem']]

In [61]:
df = pd.read_stata("data/swiid9_9.dta")
df.loc[df["country"] == "United States", "country"] = "United States of America"


In [62]:
#IA: how to gete the average of multiple columns that start with the same name like gini_disp1, gin_disp2, etc. in pandas
gini_cols = df.filter(regex=r"^_\d+_gini_disp$").columns

df["gini_disp_mean"] = df[gini_cols].mean(axis=1)

df_clean = df[["country", "year", "gini_disp_mean"]]

  df["gini_disp_mean"] = df[gini_cols].mean(axis=1)


In [63]:
#merging the datasets
merged_df = pd.merge(repression, df_clean, left_on=['country_name', 'year'], right_on=['country', 'year'], how='inner')

#IA: How to create a category variable in pandas based on the values of another continues variable
def classify_libdem(value):
    if value < 0.25:
        return 'Autocratic'
    elif 0.25 <= value < 0.5:
        return 'Electoral Authoritarian'
    elif 0.5 <= value < 0.75:
        return 'Minimally Democratic'
    else:
        return 'Democratic'
    
merged_df['class'] = merged_df['v2x_libdem'].apply(classify_libdem) 
#Only keep 1990 onwards
inequality_repression = merged_df[merged_df['year'] >= 1980]
inequality_repression = inequality_repression[inequality_repression['year'] < 2021]

In [64]:
csv_path = os.path.join(vis1_dir, "scatter.csv")
inequality_repression.to_csv(csv_path, index=False)


## Creating new GeoJson for Africa and LATAM   

In [65]:
import geopandas as gpd

In [66]:
data_dir = Path(data_dir)
vis1_dir=Path(vis1_dir)
fp = data_dir/"world.geojson"
gdf = gpd.read_file(fp)
print(gdf.head())
print("CRS:", gdf.crs)

    id                  name  \
0  AFG           Afghanistan   
1  AGO                Angola   
2  ALB               Albania   
3  ARE  United Arab Emirates   
4  ARG             Argentina   

                                            geometry  
0  POLYGON ((61.21082 35.65007, 62.23065 35.27066...  
1  MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6...  
2  POLYGON ((20.59025 41.8554, 20.46318 41.51509,...  
3  POLYGON ((51.57952 24.2455, 51.75744 24.29407,...  
4  MULTIPOLYGON (((-65.5 -55.2, -66.45 -55.25, -6...  
CRS: EPSG:4326


In [67]:
gdf["id_key"] = gdf["id"].astype(str).str.zfill(3)  
trial1["code_key"] = trial1["code"].astype(str).str.zfill(3)
gdf = gdf.merge(trial1[["code_key", "region"]], left_on="id_key", right_on="code_key", how="left")


In [68]:
LAC = gdf[gdf["region"].isin(["LAC"])]
out_path = vis1_dir / "lac.geojson"
LAC.to_file(out_path, driver="GeoJSON")

In [69]:
AFR = gdf[gdf["region"].isin(["AFR", "MNA"])]
out_path = vis1_dir / "afr.geojson"
AFR.to_file(out_path, driver="GeoJSON")