### IMPORTS

In [1]:
# load, preprocess, scale, baseline
from wrangle import final_dataset

# manipulate data
import pandas as pd
import numpy as np

# visualize data
import matplotlib.pyplot as plt
from dimensionality_reduction_plots import  (proportion_variance_explained,
                                            scree_plot,
                                            variance_explained,
                                            pca_plot)

# put it all together
from models import put_it_all_together

# Plotting
from sklearn.model_selection import (learning_curve, LearningCurveDisplay,
                                     validation_curve, ValidationCurveDisplay)

np.random.seed(123)

$$
\textbf{Cardiovascular Disease}\\~\\
\textbf{Load, Clean, Preprocess, Scale, Baseline: Cardiovascular Disease}
$$

In [2]:
%%time
X_train_scaled_cd, X_test_scaled_cd, y_train_cd, y_test_cd = final_dataset(dataset='cvd')

CVD Loading and Cleaning...
CVD Loaded and Cleaned...

CVD Splitting...
CVD Split...

CVD Scaling...
CVD Scaled...

Baseline Accuracy Score: 0.51%

CPU times: user 130 ms, sys: 23.6 ms, total: 154 ms
Wall time: 153 ms


$$
\textbf{Cardiovascular Disease}\\~\\
\textbf{Perform All Steps, Run All Models}
$$

In [3]:
%%time
# Run CVD Model
results_cv = put_it_all_together(X_train=X_train_scaled_cd,
                                 y_train=y_train_cd,
                                 X_test=X_test_scaled_cd,
                                 y_test=y_test_cd,
                                 dset='cvd')


Running All Steps
Step: 1
Fitting and Predicting Expectation Maximization
Fitting and Predicting Gaussian Mixture
Done with Gaussian Mixture
Done with Expectation Maximization

Fitting and Predicting Clustering
Fitting and Predicting KMeans
Done with KMeans
Done with Clustering
Step: 1 Complete

Step: 2
Fitting PCA
Fitting General PCA
Done with General PCA
Done with PCA

Fitting ICA
Fitting and Transforming FastICA




Done with FastICA
Done with ICA

Fitting and Transforming Randomized Projections
Fitting and Transforing with Sparse Random Projection
Done with Sparse Random Projection
Done with Randomized Projections

Fitting and Transforming Manifold Learning
Fitting and Transforming with Locally Linear Embedding: Heissan Mapping
Done with Locally Linear Embedding: Heissan Mapping
Done with Manifold Learning
Step: 2 Complete

Step: 3
Fitting and Predicting Expectation Maximization
Fitting and Predicting Gaussian Mixture
Done with Gaussian Mixture
Done with Expectation Maximization

Fitting and Predicting Clustering
Fitting and Predicting KMeans
Done with KMeans
Done with Clustering
Fitting and Predicting Expectation Maximization
Fitting and Predicting Gaussian Mixture
Done with Gaussian Mixture
Done with Expectation Maximization

Fitting and Predicting Clustering
Fitting and Predicting KMeans
Done with KMeans
Done with Clustering
Fitting and Predicting Expectation Maximization
Fitting and Predictin

  return fit_method(estimator, *args, **kwargs)


In [4]:
step1_cv = results_cv['cvd']['step1']
step2_cv = results_cv['cvd']['step2']
step3_cv = results_cv['cvd']['step3']
step4_cv = results_cv['cvd']['step4']
step5_cv = results_cv['cvd']['step5']

In [5]:
step1_cv

{'gm': (GaussianMixture(n_components=2, random_state=123),
           height    weight     ap_hi     ap_lo  age_years  mixture_clusters  \
  263   -0.524043  1.037721  0.865226  0.945336   0.174782                 0   
  43787 -0.161536 -0.222075 -0.387919 -0.134438   1.512592                 1   
  9817   1.771836 -0.851972  0.865226  0.945336   1.066655                 0   
  52704  0.925986  1.807596 -0.387919 -0.134438   1.512592                 1   
  52589  1.771836 -0.502029 -1.014491 -1.214211  -2.054902                 1   
  ...         ...       ...       ...       ...        ...               ...   
  67520 -0.765714 -1.481870 -0.387919 -0.134438  -0.568446                 1   
  39233 -1.732400 -0.082097  0.865226  0.945336   1.066655                 0   
  52329  0.080136 -0.502029 -0.387919 -0.134438  -1.460320                 1   
  25952  0.080136 -0.642006 -0.387919 -0.134438   1.661237                 1   
  10452 -0.282372  0.547800  0.865226  0.945336   1.661237   

$$
\textbf{Nutrition Facts}\\~\\
\textbf{Load, Clean, Preprocess, Scale, Baseline: Nutrition Facts}
$$

In [6]:
%%time
X_train_scaled_nf, X_test_scaled_nf, y_train_nf, y_test_nf = final_dataset(dataset='nf')

NF Loading and Cleaning...
NF Loaded and Cleaned...

NF Splitting...
NF Split...

NF Scaling...
NF Scaled...

Baseline Accuracy Score: 0.44%

CPU times: user 15.8 s, sys: 82.2 ms, total: 15.9 s
Wall time: 15.9 s


$$
\textbf{Nutrition Facts}\\~\\
\textbf{Load, Clean, Preprocess, Scale, Baseline: Nutrition Facts}
$$

In [7]:
%%time
# Run NF Model
results_nf = put_it_all_together(X_train=X_train_scaled_nf,
                                 y_train=y_train_nf,
                                 X_test=X_test_scaled_nf, 
                                 y_test=y_test_nf,
                                 dset='nf')


Running All Steps
Step: 1
Fitting and Predicting Expectation Maximization
Fitting and Predicting Gaussian Mixture
Done with Gaussian Mixture
Done with Expectation Maximization

Fitting and Predicting Clustering
Fitting and Predicting KMeans
Done with KMeans
Done with Clustering
Step: 1 Complete

Step: 2
Fitting PCA
Fitting General PCA
Done with General PCA
Done with PCA

Fitting ICA
Fitting and Transforming FastICA




Done with FastICA
Done with ICA

Fitting and Transforming Randomized Projections
Fitting and Transforing with Sparse Random Projection
Done with Sparse Random Projection
Done with Randomized Projections

Fitting and Transforming Manifold Learning
Fitting and Transforming with Locally Linear Embedding: Heissan Mapping
Done with Locally Linear Embedding: Heissan Mapping
Done with Manifold Learning
Step: 2 Complete

Step: 3
Fitting and Predicting Expectation Maximization
Fitting and Predicting Gaussian Mixture
Done with Gaussian Mixture
Done with Expectation Maximization

Fitting and Predicting Clustering
Fitting and Predicting KMeans
Done with KMeans
Done with Clustering
Fitting and Predicting Expectation Maximization
Fitting and Predicting Gaussian Mixture
Done with Gaussian Mixture
Done with Expectation Maximization

Fitting and Predicting Clustering
Fitting and Predicting KMeans
Done with KMeans
Done with Clustering
Fitting and Predicting Expectation Maximization
Fitting and Predictin

In [8]:
step1_nf = results_nf['nf']['step1']
step2_nf = results_nf['nf']['step2']
step3_nf = results_nf['nf']['step3']
step4_nf = results_nf['nf']['step4']
step5_nf = results_nf['nf']['step5']

In [9]:
step1_nf

{'gm': (GaussianMixture(n_components=2, random_state=123),
          protein  carbohydrate  cholesterol     water    sodium  \
  4222   0.846284     -0.650600     0.006982  0.412110 -0.475456   
  12765 -0.718910     -0.044696    -0.284982 -0.333440  0.079988   
  5551   1.382643     -0.616239     0.290098 -0.434772 -0.410226   
  7486  -0.538499      0.781121     0.555520 -0.537572  0.352769   
  13215 -1.111916     -0.400908    -0.435388  1.193884 -0.084076   
  ...         ...           ...          ...       ...       ...   
  13669 -0.909074      0.037198    -0.470778  0.818907 -0.139422   
  7511  -0.636018      2.235749    -0.470778 -2.696386  0.846937   
  5270  -0.714034      1.078919    -0.161119 -0.806812  0.068128   
  7518  -0.450731      1.983766    -0.470778 -1.404035  0.441719   
  958   -1.177254     -0.446723    -0.470778  1.422493 -0.590103   
  
         mixture_clusters  feat_clusters  
  4222                  0              4  
  12765                 1           