## MEDICAL DATA VISUALIZER

### TASK

In [169]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [170]:
mainpath = "/Users/ezequielpolacco/Downloads/"
filename = "medical_examination.csv"
fullpath = mainpath + "/" + filename

In [171]:
data = pd.read_csv(fullpath)

In [172]:
data.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,18393,2,168,62.0,110,80,1,1,0,0,1,0
1,1,20228,1,156,85.0,140,90,3,1,0,0,1,1
2,2,18857,1,165,64.0,130,70,3,1,0,0,0,1
3,3,17623,2,169,82.0,150,100,1,1,0,0,1,1
4,4,17474,1,156,56.0,100,60,1,1,0,0,0,0


In [173]:
data.shape

(70000, 13)

In [174]:
data.dtypes

id               int64
age              int64
gender           int64
height           int64
weight         float64
ap_hi            int64
ap_lo            int64
cholesterol      int64
gluc             int64
smoke            int64
alco             int64
active           int64
cardio           int64
dtype: object

In [175]:
data.tail()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
69995,99993,19240,2,168,76.0,120,80,1,1,1,0,1,0
69996,99995,22601,1,158,126.0,140,90,2,2,0,0,1,1
69997,99996,19066,2,183,105.0,180,90,3,1,0,1,0,1
69998,99998,22431,1,163,72.0,135,80,1,2,0,0,0,1
69999,99999,20540,1,170,72.0,120,80,2,1,0,0,1,0


In [176]:
# "age" column value : turn days in years value

In [177]:
data["age"] = data["age"] // 365.25

In [178]:
# "height" column value : turn centimeters in meters value

In [179]:
data["height"] = data["height"] / 100

In [180]:
data.head()

Unnamed: 0,id,age,gender,height,weight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,1.68,62.0,110,80,1,1,0,0,1,0
1,1,55,1,1.56,85.0,140,90,3,1,0,0,1,1
2,2,51,1,1.65,64.0,130,70,3,1,0,0,0,1
3,3,48,2,1.69,82.0,150,100,1,1,0,0,1,1
4,4,47,1,1.56,56.0,100,60,1,1,0,0,0,0


In [181]:
# Insert a "overweight" column

In [182]:
data.insert(5, "overweight", data["weight"] / np.square((data["height"])))

In [183]:
data.head()

Unnamed: 0,id,age,gender,height,weight,overweight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,1.68,62.0,21.96712,110,80,1,1,0,0,1,0
1,1,55,1,1.56,85.0,34.927679,140,90,3,1,0,0,1,1
2,2,51,1,1.65,64.0,23.507805,130,70,3,1,0,0,0,1
3,3,48,2,1.69,82.0,28.710479,150,100,1,1,0,0,1,1
4,4,47,1,1.56,56.0,23.011177,100,60,1,1,0,0,0,0


In [184]:
data["overweight"] = [1 if o > 25 else 0 for o in data["overweight"]]

In [185]:
data.head()

Unnamed: 0,id,age,gender,height,weight,overweight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,1.68,62.0,0,110,80,1,1,0,0,1,0
1,1,55,1,1.56,85.0,1,140,90,3,1,0,0,1,1
2,2,51,1,1.65,64.0,0,130,70,3,1,0,0,0,1
3,3,48,2,1.69,82.0,1,150,100,1,1,0,0,1,1
4,4,47,1,1.56,56.0,0,100,60,1,1,0,0,0,0


In [186]:
data.tail()

Unnamed: 0,id,age,gender,height,weight,overweight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
69995,99993,52,2,1.68,76.0,1,120,80,1,1,1,0,1,0
69996,99995,61,1,1.58,126.0,1,140,90,2,2,0,0,1,1
69997,99996,52,2,1.83,105.0,1,180,90,3,1,0,1,0,1
69998,99998,61,1,1.63,72.0,1,135,80,1,2,0,0,0,1
69999,99999,56,1,1.7,72.0,0,120,80,2,1,0,0,1,0


In [187]:
# Normalize the data by making 0 always good and 1 always bad. 
#If the value of cholesterol or gluc is 1, make the value 0. If the value is more than 1, make the value 1.

In [188]:
data["cholesterol"] = [0 if c <= 1 else 1 for c in data["cholesterol"]]


In [189]:
data.head()

Unnamed: 0,id,age,gender,height,weight,overweight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,1.68,62.0,0,110,80,0,1,0,0,1,0
1,1,55,1,1.56,85.0,1,140,90,1,1,0,0,1,1
2,2,51,1,1.65,64.0,0,130,70,1,1,0,0,0,1
3,3,48,2,1.69,82.0,1,150,100,0,1,0,0,1,1
4,4,47,1,1.56,56.0,0,100,60,0,1,0,0,0,0


In [190]:
data["gluc"] = [0 if g <= 1 else 1 for g in data["gluc"]]

In [191]:
data.head()

Unnamed: 0,id,age,gender,height,weight,overweight,ap_hi,ap_lo,cholesterol,gluc,smoke,alco,active,cardio
0,0,50,2,1.68,62.0,0,110,80,0,0,0,0,1,0
1,1,55,1,1.56,85.0,1,140,90,1,0,0,0,1,1
2,2,51,1,1.65,64.0,0,130,70,1,0,0,0,0,1
3,3,48,2,1.69,82.0,1,150,100,0,0,0,0,1,1
4,4,47,1,1.56,56.0,0,100,60,0,0,0,0,0,0


In [192]:
# Convert the data into long format and create a chart that shows the value counts of the categorical features 
#using seaborn's catplot(). 
#The dataset should be split by 'Cardio' so there is one chart for each cardio value. 

In [193]:
data2 = pd.melt(data, id_vars=["cardio"], value_vars = ['cholesterol', 'gluc', 'smoke', 'alco', 'active',
                         'overweight'])

In [194]:
data2.head()

Unnamed: 0,cardio,variable,value
0,0,cholesterol,0
1,1,cholesterol,1
2,1,cholesterol,1
3,1,cholesterol,0
4,0,cholesterol,0


In [162]:
data2 = pd.DataFrame(data2.groupby(['cardio', 'variable','value'])['value'].count()).rename(columns={'value': 'total'})

In [163]:
data2.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,total
cardio,variable,value,Unnamed: 3_level_1
0,active,0,6378
0,active,1,28643
0,alco,0,33080
0,alco,1,1941
0,cholesterol,0,29330


In [200]:
cardio_plot = sns.catplot(*, x = "variable", y = "total", hue = "value", col = "cardio", data = data2, kind = "bar")
#fig = cardio_plot.fig

SyntaxError: invalid syntax (<ipython-input-200-bfb71480252d>, line 1)