# Dimensionality reduction with PCA

In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn as sklearn
import seaborn as sns
from similarity_functions import asymmetric_cosine, jaccard 
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA

%matplotlib inline

In [14]:
# Load data.
X_train = pd.read_csv('train_dataset.csv', sep = '\t', index_col = 0).drop('cuisine', axis = 1)
cuisines = pd.read_csv('train_dataset.csv', sep = '\t', usecols = ['cuisine'])
cuisines.set_index(X_train.index, inplace = True)

In [15]:
X_train.head()

Unnamed: 0,active_dry_yeast,allpurpose_flour,avocado,bacon,baguette,baking_soda,basil,beans,beansprouts,beef,...,mozzarella_shredded_cheese,onions_sliced_green,vegetable,potatoes_sweet,vegetable_cooking_spray,vinegar_white,onion_white,pepper_white,vinegar_white_wine,pepper_yellow_bell
10259,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
25693,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
20130,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
13162,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6602,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
# Transpose matrix
X_T = X_train.T

# Number of rows
n = X_T.shape[1]
print("number of recipes:", n)

X_T.head()

number of recipes: 37340


Unnamed: 0,10259,25693,20130,13162,6602,42779,3735,16903,12734,5875,...,8089,6153,25557,24348,7377,29109,11462,2238,41882,2362
active_dry_yeast,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
allpurpose_flour,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
avocado,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
bacon,0,0,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,0
baguette,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [17]:
# PCA object
pca = PCA(n_components = n)

# Fit and transform to obtain the transpose of the desired PCA matrix
X_train_pca_T = pca.fit_transform(X_T)

# Transpose result
X_train_pca = X_train_pca_T.T

In [18]:
X_train_pca = pd.DataFrame(X_train_pca)
X_train_pca.columns = X_train.columns
X_train_pca.head()

Unnamed: 0,active_dry_yeast,allpurpose_flour,avocado,bacon,baguette,baking_soda,basil,beans,beansprouts,beef,...,mozzarella_shredded_cheese,onions_sliced_green,vegetable,potatoes_sweet,vegetable_cooking_spray,vinegar_white,onion_white,pepper_white,vinegar_white_wine,pepper_yellow_bell
0,-4.226202,12.320884,-0.005376,-0.222035,-4.211263,-3.34427,-3.221364,2.385825,-3.70325,-1.196846,...,-3.489603,-3.857968,14.617534,-3.555858,-4.629896,-3.005936,-2.662959,-3.90087,-3.902531,-3.765
1,1.87961,37.647692,-4.083548,-0.268213,-1.204777,9.577921,-1.012628,-3.933304,0.0089,-0.531252,...,-1.425662,-0.933354,11.970045,-0.059157,-0.764868,0.038476,-1.807476,0.106766,-1.259017,-1.698306
2,-0.836278,-13.026019,0.004456,-0.323731,-0.902204,-1.574164,0.730311,-0.150121,2.4662,2.194721,...,-0.884236,-0.439898,14.071268,-0.061459,-0.43266,1.028404,-0.251001,1.730022,-0.808038,-0.779638
3,-1.355229,-14.003337,0.850557,-2.797684,-0.270012,-2.855993,0.417839,-0.772119,2.246197,0.345336,...,-2.740197,0.481975,18.431779,-1.040781,-0.999895,0.465059,0.811773,0.277232,-0.209404,-0.291876
4,-0.002948,6.994738,-2.078868,2.258141,-0.139643,0.757194,-0.831923,1.576642,-0.802017,1.329311,...,-0.17276,0.053787,9.387959,0.657032,0.283215,-0.246152,-1.013676,-0.764933,-0.351068,-0.194087


In [32]:
X_train_pca.shape

(267, 267)

In [33]:
X_train_pca.to_csv('train_dataset_recipe_pca.csv', sep = '\t')