In [1]:
import pandas as pd
import os

# Load the dataset
df = pd.read_csv("faodata.csv")

# Create a directory to store the CSV files
output_dir = "features_csv_files"
os.makedirs(output_dir, exist_ok=True)

# Group the dataset by the 'Item' column
grouped = df.groupby('Item')

# Save each group as a separate CSV file
for item, group in grouped:
    # Replace spaces and special characters in the filename
    filename = f"{item}.csv".replace(" ", "_").replace("/", "_").replace("(", "").replace(")", "")
    
    # Save the group to a CSV file
    group.to_csv(os.path.join(output_dir, filename), index=False)
    
    print(f"Saved: {filename}")

print(f"\nAll CSV files have been saved in the '{output_dir}' directory.")

Saved: Average_dietary_energy_requirement_kcal_cap_day.csv
Saved: Average_dietary_energy_supply_adequacy_percent_3-year_average.csv
Saved: Average_fat_supply_g_cap_day_3-year_average.csv
Saved: Average_protein_supply_g_cap_day_3-year_average.csv
Saved: Average_supply_of_protein_of_animal_origin_g_cap_day_3-year_average.csv
Saved: Cereal_import_dependency_ratio_percent_3-year_average.csv
Saved: Coefficient_of_variation_of_habitual_caloric_consumption_distribution_real_number.csv
Saved: Dietary_energy_supply_used_in_the_estimation_of_prevalence_of_undernourishment_kcal_cap_day.csv
Saved: Dietary_energy_supply_used_in_the_estimation_of_prevalence_of_undernourishment_kcal_cap_day_3-year_average.csv
Saved: Gross_domestic_product_per_capita,_PPP,_constant_2017_international_$.csv
Saved: Incidence_of_caloric_losses_at_retail_distribution_level_percent.csv
Saved: Minimum_dietary_energy_requirement__kcal_cap_day.csv
Saved: Number_of_children_under_5_years_affected_by_wasting_million.csv
Saved: 

In [2]:
df.columns

Index(['Domain', 'Area', 'Element', 'Item', 'Year', 'Unit', 'Value'], dtype='object')

In [3]:
columns_to_drop = ['Domain', 'Element', 'Unit']
df = df.drop(columns=columns_to_drop, errors='ignore')

In [4]:
df['Item'].unique()

array(['Average dietary energy supply adequacy (percent) (3-year average)',
       'Dietary energy supply used in the estimation of prevalence of undernourishment (kcal/cap/day)',
       'Dietary energy supply used in the estimation of prevalence of undernourishment (kcal/cap/day) (3-year average)',
       'Share of dietary energy supply derived from cereals, roots and tubers (percent) (3-year average)',
       'Average protein supply (g/cap/day) (3-year average)',
       'Average supply of protein of animal origin (g/cap/day) (3-year average)',
       'Gross domestic product per capita, PPP, (constant 2017 international $)',
       'Prevalence of undernourishment (percent) (3-year average)',
       'Number of people undernourished (million) (3-year average)',
       'Prevalence of severe food insecurity in the total population (percent) (3-year average)',
       'Prevalence of severe food insecurity in the male adult population (percent) (3-year average)',
       'Prevalence of severe

In [5]:
# Liste des valeurs spécifiques dans la colonne 'Item'
items_to_select = [
    'Average dietary energy supply adequacy (percent) (3-year average)',
    'Dietary energy supply used in the estimation of prevalence of undernourishment (kcal/cap/day) (3-year average)',
    'Share of dietary energy supply derived from cereals, roots and tubers (percent) (3-year average)',
    'Prevalence of anemia among women of reproductive age (15-49 years) (percent)',
    'Prevalence of severe food insecurity in the total population (percent) (3-year average)',
    'Gross domestic product per capita, PPP, (constant 2017 international $)',
    'Average protein supply (g/cap/day) (3-year average)',
    'Percentage of children under 5 years affected by wasting (percent)',
    'Percentage of population using safely managed drinking water services (percent)',
    'Cereal import dependency ratio (percent) (3-year average)',
    'Minimum dietary energy requirement  (kcal/cap/day)',
    'Average supply of protein of animal origin (g/cap/day) (3-year average)',
    'Prevalence of undernourishment (percent) (3-year average)'
]

# Filtrer les lignes où la colonne 'Item' contient ces valeurs
df_undernourishment = df[df['Item'].isin(items_to_select)]

# Afficher les premières lignes du DataFrame filtré
df_undernourishment.head()

Unnamed: 0,Area,Item,Year,Value
0,Afghanistan,Average dietary energy supply adequacy (percen...,2000-2002,87
1,Afghanistan,Average dietary energy supply adequacy (percen...,2001-2003,88
2,Afghanistan,Average dietary energy supply adequacy (percen...,2002-2004,91
3,Afghanistan,Average dietary energy supply adequacy (percen...,2003-2005,92
4,Afghanistan,Average dietary energy supply adequacy (percen...,2004-2006,93


In [6]:
df_undernourishment.tail()

Unnamed: 0,Area,Item,Year,Value
173353,Zimbabwe,Minimum dietary energy requirement (kcal/cap/...,2019,1740
173354,Zimbabwe,Minimum dietary energy requirement (kcal/cap/...,2020,1743
173355,Zimbabwe,Minimum dietary energy requirement (kcal/cap/...,2021,1747
173356,Zimbabwe,Minimum dietary energy requirement (kcal/cap/...,2022,1752
173357,Zimbabwe,Minimum dietary energy requirement (kcal/cap/...,2023,1757


In [7]:
df_undernourishment.shape

(48523, 4)

In [8]:
df_undernourishment.isna().sum()

Area        0
Item        0
Year        0
Value    5518
dtype: int64

In [9]:
df_undernourishment['Item'].value_counts()

Item
Percentage of children under 5 years affected by wasting (percent)                                                4693
Minimum dietary energy requirement  (kcal/cap/day)                                                                4632
Prevalence of undernourishment (percent) (3-year average)                                                         4488
Gross domestic product per capita, PPP, (constant 2017 international $)                                           4330
Prevalence of anemia among women of reproductive age (15-49 years) (percent)                                      4080
Dietary energy supply used in the estimation of prevalence of undernourishment (kcal/cap/day) (3-year average)    3828
Average dietary energy supply adequacy (percent) (3-year average)                                                 3809
Share of dietary energy supply derived from cereals, roots and tubers (percent) (3-year average)                  3578
Average protein supply (g/cap/day) (3-year 

In [10]:
df_undernourishment.to_csv('undernourishment.csv')

In [13]:
data = pd.read_csv('undernourishment.csv')
data.columns

Index(['Unnamed: 0', 'Area', 'Item', 'Year', 'Value'], dtype='object')

In [14]:
data.head()

Unnamed: 0.1,Unnamed: 0,Area,Item,Year,Value
0,0,Afghanistan,Average dietary energy supply adequacy (percen...,2000-2002,87
1,1,Afghanistan,Average dietary energy supply adequacy (percen...,2001-2003,88
2,2,Afghanistan,Average dietary energy supply adequacy (percen...,2002-2004,91
3,3,Afghanistan,Average dietary energy supply adequacy (percen...,2003-2005,92
4,4,Afghanistan,Average dietary energy supply adequacy (percen...,2004-2006,93
