# Capstone Project 1
Working with NumPy Matrices (NHANES Dataset)

## Import Libraries

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from scipy import stats
import seaborn as sns

## Load Data

In [None]:
male = np.genfromtxt('nhanes_adult_male_bmx_2020.csv', delimiter=',', skip_header=1)
female = np.genfromtxt('nhanes_adult_female_bmx_2020.csv', delimiter=',', skip_header=1)
male.shape, female.shape

## Histograms of Weights

In [None]:
male_weights = male[:,0]
female_weights = female[:,0]
xmin = min(male_weights.min(), female_weights.min())
xmax = max(male_weights.max(), female_weights.max())
plt.figure(figsize=(8,8))
plt.subplot(2,1,1)
plt.hist(female_weights, bins=20)
plt.xlim(xmin, xmax)
plt.title('Female Weight Distribution')
plt.subplot(2,1,2)
plt.hist(male_weights, bins=20)
plt.xlim(xmin, xmax)
plt.title('Male Weight Distribution')
plt.tight_layout()
plt.show()

## Boxplot Comparison

In [None]:
plt.boxplot([female_weights, male_weights], labels=['Female','Male'])
plt.ylabel('Weight (kg)')
plt.show()

## Numerical Aggregates

In [None]:
def describe(data):
    return {
        'Mean': np.mean(data),
        'Median': np.median(data),
        'Std Dev': np.std(data),
        'Skewness': stats.skew(data),
        'Kurtosis': stats.kurtosis(data)
    }
describe(male_weights), describe(female_weights)

## BMI Calculation (Females)

In [None]:
height_m = female[:,1] / 100
bmi_female = female[:,0] / (height_m**2)
female = np.column_stack((female, bmi_female))

## Standardization

In [None]:
zfemale = (female - np.mean(female, axis=0)) / np.std(female, axis=0)

## Pairplot & Correlation

In [None]:
cols = [0,1,6,5,7]
labels = ['Weight','Height','Waist','Hip','BMI']
df = pd.DataFrame(zfemale[:,cols], columns=labels)
sns.pairplot(df)
plt.show()
df.corr(), df.corr(method='spearman')

## Lowest & Highest BMI

In [None]:
bmi = female[:,7]
idx = np.argsort(bmi)
zfemale[idx[:5]], zfemale[idx[-5:]]