In [8]:
import pandas as pd
import matplotlib as plt
import numpy as np

INDICATOR_LABEL = "Prevalence of anemia in pregnant women (aged 15-49)"
print(INDICATOR_LABEL)

# Initializing empty dataframe
full_anemia_df = pd.DataFrame()

Prevalence of anemia in pregnant women (aged 15-49)


In [9]:
try:
    full_anemia_df = pd.read_csv('DataSources/WHO_GHO_NUTRITION_ANAEMIA_PREGNANT_PREV.csv')
    print("Data Source Found.")
    #print(full_anemia_df.head())
except FileNotFoundError:
    print("Error: The data source file was not found. Check the path and filename.")
except pd.errors.EmptyDataError:
    print("Error: The file exists but is empty.")
except Exception as e:
    print(f"An unexpected error occurred while reading the file: {e}")


# Print out column names
print(full_anemia_df.columns)

Data Source Found.
Index(['FREQ_LABEL', 'REF_AREA', 'REF_AREA_LABEL', 'INDICATOR_LABEL',
       'TIME_PERIOD', 'OBS_VALUE'],
      dtype='object')


In [10]:
# Original DF
print(full_anemia_df.head())
print('*' * 50)

# Getting only REF ARE, REF_AREA_LABEL, TIME_PERIOD, & OBS_VALUE
full_anemia_df = full_anemia_df[["REF_AREA","REF_AREA_LABEL","TIME_PERIOD", "OBS_VALUE"]]
print(full_anemia_df.head())
print('*' * 50)

# Sorting so that all the TIME_PERIOD's are in order while still being grouped by REF_AREA
full_anemia_df = full_anemia_df.sort_values(by=["REF_AREA", "TIME_PERIOD"]).reset_index(drop=True)
print(full_anemia_df.head())
print('*' * 50)


  FREQ_LABEL REF_AREA REF_AREA_LABEL  \
0     Annual      AFG    Afghanistan   
1     Annual      AFG    Afghanistan   
2     Annual      AFG    Afghanistan   
3     Annual      AFG    Afghanistan   
4     Annual      AFG    Afghanistan   

                                     INDICATOR_LABEL  TIME_PERIOD  OBS_VALUE  
0  Prevalence of anemia in pregnant women (aged 1...         2006       41.4  
1  Prevalence of anemia in pregnant women (aged 1...         2001       43.9  
2  Prevalence of anemia in pregnant women (aged 1...         2018       36.7  
3  Prevalence of anemia in pregnant women (aged 1...         2008       40.3  
4  Prevalence of anemia in pregnant women (aged 1...         2004       42.4  
**************************************************
  REF_AREA REF_AREA_LABEL  TIME_PERIOD  OBS_VALUE
0      AFG    Afghanistan         2006       41.4
1      AFG    Afghanistan         2001       43.9
2      AFG    Afghanistan         2018       36.7
3      AFG    Afghanistan         

In [12]:
# Saving clean data to new file in DataSources directory
output_path = "DataSources/Clean_Data_Anemia.csv"

try:
    full_anemia_df.to_csv(output_path, index=False)
    print(f"File saved to {output_path}")
except Exception as e:
    print(f"Failed to save CSV: {e}")


File saved to DataSources/Clean_Data_Anemia.csv
