# Anorexia & Bulimia Preprocessor

## Imports

In [1]:
import pandas as pd

## Load Data

In [2]:
file_name = "number-with-anorexia-and-bulimia-nervosa"
df = pd.read_csv(f"../data/{file_name}.csv")
df.head()

Unnamed: 0,Entity,Code,Year,Prevalence - Anorexia nervosa - Sex: Both - Age: All Ages (Number),Prevalence - Bulimia nervosa - Sex: Both - Age: All Ages (Number)
0,Afghanistan,AFG,1990,1869.815372,6671.754883
1,Afghanistan,AFG,1991,1940.434799,6880.802091
2,Afghanistan,AFG,1992,2379.431771,8447.597072
3,Afghanistan,AFG,1993,2815.379527,10093.391198
4,Afghanistan,AFG,1994,2868.647733,10426.222061


## Analysis

In [3]:
df.isnull().sum()

Entity                                                                  0
Code                                                                  980
Year                                                                    0
Prevalence - Anorexia nervosa - Sex: Both - Age: All Ages (Number)      0
Prevalence - Bulimia nervosa - Sex: Both - Age: All Ages (Number)       0
dtype: int64

In [4]:
df["Year"].describe()

count    6468.000000
mean     2003.500000
std         8.078372
min      1990.000000
25%      1996.750000
50%      2003.500000
75%      2010.250000
max      2017.000000
Name: Year, dtype: float64

## Date Cleaning

In [5]:
df = df.rename(columns={'Prevalence - Anorexia nervosa - Sex: Both - Age: All Ages (Number)':"Anorexia", 'Prevalence - Bulimia nervosa - Sex: Both - Age: All Ages (Number)':"Bulimia"})
df = df.astype({"Anorexia": int, "Bulimia": int})
df.head()

Unnamed: 0,Entity,Code,Year,Anorexia,Bulimia
0,Afghanistan,AFG,1990,1869,6671
1,Afghanistan,AFG,1991,1940,6880
2,Afghanistan,AFG,1992,2379,8447
3,Afghanistan,AFG,1993,2815,10093
4,Afghanistan,AFG,1994,2868,10426


In [6]:
df = df.drop(columns=['Code'])
df.head()

Unnamed: 0,Entity,Year,Anorexia,Bulimia
0,Afghanistan,1990,1869,6671
1,Afghanistan,1991,1940,6880
2,Afghanistan,1992,2379,8447
3,Afghanistan,1993,2815,10093
4,Afghanistan,1994,2868,10426


In [7]:
df_1990 = df.drop(df[df["Year"] != 1990].index)
df_1990.head()

Unnamed: 0,Entity,Year,Anorexia,Bulimia
0,Afghanistan,1990,1869,6671
28,Albania,1990,932,3529
56,Algeria,1990,8745,36943
84,American Samoa,1990,24,68
112,Andean Latin America,1990,14020,95559


In [8]:
df_2017 = df.drop(df[df["Year"] != 2017].index)
df_2017.head()

Unnamed: 0,Entity,Year,Anorexia,Bulimia
27,Afghanistan,2017,7050,27198
55,Albania,2017,844,3497
83,Algeria,2017,14588,75117
111,American Samoa,2017,24,68
139,Andean Latin America,2017,24809,194054


## Save Data

In [9]:
df_1990.to_csv(f"../data/anorexia-bulimia-1990.csv")
df_2017.to_csv(f"../data/anorexia-bulimia-2017.csv")