### Importing Dependencies

In [1]:
import pandas as pd
from sqlalchemy import create_engine
import requests
import time
from pprint import pprint
import numpy as np
import json


### Extract CSV into DataFrames

In [2]:
# Importing Food Information dataset from Kaggle
csv_file = "kaggle_food.csv"
nutrients_df = pd.read_csv(csv_file)
nutrients_df.head()

Unnamed: 0,Category,Description,Nutrient Data Bank Number,Data.Alpha Carotene,Data.Ash,Data.Beta Carotene,Data.Beta Cryptoxanthin,Data.Carbohydrate,Data.Cholesterol,Data.Choline,...,Data.Major Minerals.Potassium,Data.Major Minerals.Sodium,Data.Major Minerals.Zinc,Data.Vitamins.Vitamin A - IU,Data.Vitamins.Vitamin A - RAE,Data.Vitamins.Vitamin B12,Data.Vitamins.Vitamin B6,Data.Vitamins.Vitamin C,Data.Vitamins.Vitamin E,Data.Vitamins.Vitamin K
0,BUTTER,"BUTTER,WITH SALT",1001,0,2.11,158,0,0.06,215,19,...,24,576,0.09,2499,684,0.17,0.003,0.0,2.32,7.0
1,BUTTER,"BUTTER,WHIPPED,WITH SALT",1002,0,2.11,158,0,0.06,219,19,...,26,827,0.05,2499,684,0.13,0.003,0.0,2.32,7.0
2,BUTTER OIL,"BUTTER OIL,ANHYDROUS",1003,0,0.0,193,0,0.0,256,22,...,5,2,0.01,3069,840,0.01,0.001,0.0,2.8,8.6
3,CHEESE,"CHEESE,BLUE",1004,0,5.11,74,0,2.34,75,15,...,256,1395,2.66,763,198,1.22,0.166,0.0,0.25,2.4
4,CHEESE,"CHEESE,BRICK",1005,0,3.18,76,0,2.79,94,15,...,136,560,2.6,1080,292,1.26,0.065,0.0,0.26,2.5


In [3]:
# Importing Recipes by Ingredients dataset from Kaggle
csv_file = "master_recipe.csv"
recipes_df = pd.read_csv(csv_file, index_col=False)
recipes_df.head()

Unnamed: 0,label,achiote paste,achiote powder,acini di pepe,acorn squash,active dry yeast,adobo sauce,adobo seasoning,adzuki beans,agave nectar,...,yellow rock sugar,yellow squash,yellow summer squash,yellow tomato,yellowfin tuna,yogurt cheese,yucca root,yukon gold potatoes,yuzu juice,zucchini blossoms
0,Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Transform Nutrients DataFrame

In [4]:
#Retaining only the columns necessary for the database (Food Information dataset)
lean_nutrient = nutrients_df.drop(columns= [
 'Data.Alpha Carotene',
 'Data.Ash',
 'Data.Beta Carotene',
 'Data.Beta Cryptoxanthin',
 'Data.Choline',
 'Data.Lutein and Zeaxanthin',
 'Data.Lycopene',
 'Data.Manganese',
 'Data.Niacin',
 'Data.Pantothenic Acid',
 'Data.Refuse Percentage',
 'Data.Retinol',
 'Data.Riboflavin',
 'Data.Selenium',
 'Data.Thiamin',
 'Data.Water',
 'Data.Fat.Total Lipid',
 'Data.Household Weights.1st Household Weight',
 'Data.Household Weights.1st Household Weight Description',
 'Data.Household Weights.2nd Household Weight',
 'Data.Household Weights.2nd Household Weight Description',
 'Data.Major Minerals.Calcium',
 'Data.Major Minerals.Copper',
 'Data.Major Minerals.Iron',
 'Data.Major Minerals.Magnesium',
 'Data.Major Minerals.Phosphorus',
 'Data.Major Minerals.Potassium',
 'Data.Major Minerals.Zinc',
 'Data.Vitamins.Vitamin A - IU',
 'Data.Vitamins.Vitamin A - RAE',
 'Data.Vitamins.Vitamin B12',
 'Data.Vitamins.Vitamin B6',
 'Data.Vitamins.Vitamin C',
 'Data.Vitamins.Vitamin E',
 'Data.Vitamins.Vitamin K'])

lean_nutrient.head()

Unnamed: 0,Category,Description,Nutrient Data Bank Number,Data.Carbohydrate,Data.Cholesterol,Data.Fiber,Data.Kilocalories,Data.Protein,Data.Sugar Total,Data.Fat.Monosaturated Fat,Data.Fat.Polysaturated Fat,Data.Fat.Saturated Fat,Data.Major Minerals.Sodium
0,BUTTER,"BUTTER,WITH SALT",1001,0.06,215,0.0,717,0.85,0.06,21.021,3.043,51.368,576
1,BUTTER,"BUTTER,WHIPPED,WITH SALT",1002,0.06,219,0.0,717,0.85,0.06,23.426,3.012,50.489,827
2,BUTTER OIL,"BUTTER OIL,ANHYDROUS",1003,0.0,256,0.0,876,0.28,0.0,28.732,3.694,61.924,2
3,CHEESE,"CHEESE,BLUE",1004,2.34,75,0.0,353,21.4,0.5,7.778,0.8,18.669,1395
4,CHEESE,"CHEESE,BRICK",1005,2.79,94,0.0,371,23.24,0.51,8.598,0.784,18.764,560


In [5]:
#Renaming the columns in the Food Information (lean_nutrient dataframe)
rename_nutrients = lean_nutrient.rename(columns={"Category" : "ingredient",
                                                "Description" : "food_description",
                                                "Nutrient Data Bank Number" : "ingredient_id",
                                                "Data.Carbohydrate" : "carbs",
                                                 "Data.Protein" : "protein",
                                                "Data.Cholesterol" : "cholesterol",
                                                "Data.Fiber" : "fiber",
                                                "Data.Kilocalories" : "calories",
                                                "Data.Sugar Total" : "sugar",
                                                "Data.Fat.Monosaturated Fat": "monosaturated_fat",
                                                "Data.Fat.Polysaturated Fat" : "polysaturated_fat",
                                                "Data.Fat.Saturated Fat" : "saturated_fat",
                                                "Data.Major Minerals.Sodium" : "sodium"})

rename_nutrients

Unnamed: 0,ingredient,food_description,ingredient_id,carbs,cholesterol,fiber,calories,protein,sugar,monosaturated_fat,polysaturated_fat,saturated_fat,sodium
0,BUTTER,"BUTTER,WITH SALT",1001,0.06,215,0.0,717,0.85,0.06,21.021,3.043,51.368,576
1,BUTTER,"BUTTER,WHIPPED,WITH SALT",1002,0.06,219,0.0,717,0.85,0.06,23.426,3.012,50.489,827
2,BUTTER OIL,"BUTTER OIL,ANHYDROUS",1003,0.00,256,0.0,876,0.28,0.00,28.732,3.694,61.924,2
3,CHEESE,"CHEESE,BLUE",1004,2.34,75,0.0,353,21.40,0.50,7.778,0.800,18.669,1395
4,CHEESE,"CHEESE,BRICK",1005,2.79,94,0.0,371,23.24,0.51,8.598,0.784,18.764,560
...,...,...,...,...,...,...,...,...,...,...,...,...,...
7408,FROG LEGS,"FROG LEGS,RAW",80200,0.00,50,0.0,73,16.40,0.00,0.053,0.102,0.076,58
7409,MACKEREL,"MACKEREL,SALTED",83110,0.00,95,0.0,305,18.50,0.00,8.320,6.210,7.148,4450
7410,SCALLOP,"SCALLOP,(BAY&SEA),CKD,STMD",90240,0.00,53,0.0,112,23.20,0.00,0.068,0.481,0.146,265
7411,SNAIL,"SNAIL,RAW",90560,2.00,50,0.0,90,16.10,0.00,0.259,0.252,0.361,70


In [6]:
#checking for missing values
null_check = rename_nutrients.isnull().sum().sum()

null_check

0

In [7]:
#saving final dataset as a csv
rename_nutrients.to_csv("output/kaggle_nutrients.csv",index=False)


### Transform Recipes DataFrame

In [95]:
# replacing spaces with underscores in the column names
ingred_list = recipes_df.columns.str.replace(' ', '_').tolist()
recipes_df.columns = ingred_list

recipes_df

Unnamed: 0,label,achiote_paste,achiote_powder,acini_di_pepe,acorn_squash,active_dry_yeast,adobo_sauce,adobo_seasoning,adzuki_beans,agave_nectar,...,yellow_rock_sugar,yellow_squash,yellow_summer_squash,yellow_tomato,yellowfin_tuna,yogurt_cheese,yucca_root,yukon_gold_potatoes,yuzu_juice,zucchini_blossoms
0,Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29178,Spicy Agave Wave,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
29179,Apple Chimichangas,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29180,Summer Corn Salad,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29181,Zucchini Stuffed Tomatoes,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [167]:
ingred_list = recipes_df.columns.str.replace('-', '_').tolist()
recipes_df.columns = ingred_list

recipes_df

Unnamed: 0,label,achiote_paste,achiote_powder,acini_di_pepe,acorn_squash,active_dry_yeast,adobo_sauce,adobo_seasoning,adzuki_beans,agave_nectar,...,yellow_rock_sugar,yellow_squash,yellow_summer_squash,yellow_tomato,yellowfin_tuna,yogurt_cheese,yucca_root,yukon_gold_potatoes,yuzu_juice,zucchini_blossoms
0,Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29178,Spicy Agave Wave,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
29179,Apple Chimichangas,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29180,Summer Corn Salad,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29181,Zucchini Stuffed Tomatoes,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [182]:
ingred_list = recipes_df.columns.str.replace("'", '').tolist()
recipes_df.columns = ingred_list

recipes_df

Unnamed: 0,label,achiote_paste,achiote_powder,acini_di_pepe,acorn_squash,active_dry_yeast,adobo_sauce,adobo_seasoning,adzuki_beans,agave_nectar,...,yellow_rock_sugar,yellow_squash,yellow_summer_squash,yellow_tomato,yellowfin_tuna,yogurt_cheese,yucca_root,yukon_gold_potatoes,yuzu_juice,zucchini_blossoms
0,Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29178,Spicy Agave Wave,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
29179,Apple Chimichangas,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29180,Summer Corn Salad,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29181,Zucchini Stuffed Tomatoes,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [184]:
recipes_df.rename(columns=str.lower)

Unnamed: 0,label,achiote_paste,achiote_powder,acini_di_pepe,acorn_squash,active_dry_yeast,adobo_sauce,adobo_seasoning,adzuki_beans,agave_nectar,...,yellow_rock_sugar,yellow_squash,yellow_summer_squash,yellow_tomato,yellowfin_tuna,yogurt_cheese,yucca_root,yukon_gold_potatoes,yuzu_juice,zucchini_blossoms
0,Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29178,Spicy Agave Wave,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
29179,Apple Chimichangas,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29180,Summer Corn Salad,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29181,Zucchini Stuffed Tomatoes,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [185]:
#checking for missing values
null_check = recipes_df.isnull().sum().sum()

null_check

0

In [186]:
recipes_df.set_index('label')

Unnamed: 0_level_0,achiote_paste,achiote_powder,acini_di_pepe,acorn_squash,active_dry_yeast,adobo_sauce,adobo_seasoning,adzuki_beans,agave_nectar,agave_tequila,...,yellow_rock_sugar,yellow_squash,yellow_summer_squash,yellow_tomato,yellowfin_tuna,yogurt_cheese,yucca_root,yukon_gold_potatoes,yuzu_juice,zucchini_blossoms
label,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Spicy Agave Wave,0,0,0,0,0,0,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
Apple Chimichangas,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Summer Corn Salad,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
Zucchini Stuffed Tomatoes,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [187]:
recipes_df1= recipes_df[recipes_df.columns[-1500:]]
recipes_df1["label"]= recipes_df['label']

recipes_df1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  recipes_df1["label"]= recipes_df['label']


Unnamed: 0,instant_tapioca,instant_white_rice,instant_yeast,iodized_salt,jack_cheese,jalapeno_chilies,jasmine_brown_rice,jasmine_rice,jerk_sauce,jerk_seasoning,...,yellow_squash,yellow_summer_squash,yellow_tomato,yellowfin_tuna,yogurt_cheese,yucca_root,yukon_gold_potatoes,yuzu_juice,zucchini_blossoms,label
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Infineon Raceway Baked Beans
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Sour Cream Noodle Bake
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Middle-Eastern Eggplant Rounds
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Saffron Jewel Rice
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Thai Sweet and Sour Wings
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29178,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Spicy Agave Wave
29179,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Apple Chimichangas
29180,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Summer Corn Salad
29181,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,Zucchini Stuffed Tomatoes


In [188]:
recipes_df2= recipes_df[recipes_df.columns[0:1425]]

recipes_df2

Unnamed: 0,label,achiote_paste,achiote_powder,acini_di_pepe,acorn_squash,active_dry_yeast,adobo_sauce,adobo_seasoning,adzuki_beans,agave_nectar,...,iced_tea,instant_coffee,instant_couscous,instant_espresso,instant_espresso_granules,instant_espresso_powder,instant_oats,instant_potato_flakes,instant_pudding_mix,instant_rice
0,Infineon Raceway Baked Beans,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,Sour Cream Noodle Bake,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,Middle-Eastern Eggplant Rounds,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,Saffron Jewel Rice,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,Thai Sweet and Sour Wings,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
29178,Spicy Agave Wave,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
29179,Apple Chimichangas,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29180,Summer Corn Salad,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
29181,Zucchini Stuffed Tomatoes,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [189]:
#final datasets to csv
recipes_df1.to_csv("output/kaggle_ingredients1.csv",index=False)
recipes_df2.to_csv("output/kaggle_ingredients2.csv",index=False)

In [190]:
ingred_list1=recipes_df1.columns.to_list()
ingred_list1

['instant_tapioca',
 'instant_white_rice',
 'instant_yeast',
 'iodized_salt',
 'jack_cheese',
 'jalapeno_chilies',
 'jasmine_brown_rice',
 'jasmine_rice',
 'jerk_sauce',
 'jerk_seasoning',
 'juice_concentrate',
 'jumbo_pasta_shells',
 'jumbo_shells',
 'jumbo_shrimp',
 'juniper_berries',
 'kabocha_squash',
 'kaffir_lime',
 'kaffir_lime_leaves',
 'kaiser_rolls',
 'kale_leaves',
 'kecap_manis',
 'key_lime',
 'key_lime_juice',
 'kidney_beans',
 'king_crab',
 'king_crab_legs',
 'king_salmon',
 'kirby_cucumbers',
 'kiwi_fruits',
 'kosher_salt',
 'lacinato_kale',
 'lady_apples',
 'lady_fingers',
 'lager_beer',
 'lamb_chops',
 'lamb_cubes',
 'lamb_leg',
 'lamb_loin',
 'lamb_loin_chops',
 'lamb_neck',
 'lamb_racks',
 'lamb_rib_chops',
 'lamb_sausage',
 'lamb_shanks',
 'lamb_shoulder',
 'lamb_shoulder_chops',
 'lamb_stew_meat',
 'lamb_stock',
 'lap_cheong',
 'large_egg_whites',
 'large_egg_yolks',
 'large_eggs',
 'large_flour_tortillas',
 'large_garlic_cloves',
 'large_marshmallows',
 'large_shr

In [191]:
#preparing the list of columns to import into pgAdmin to create table schema
ingred_schema_initial = pd.DataFrame(ingred_list1) + " " + 'INT'

ingred_schema1= ingred_schema_initial.values.tolist()
ingred_schema1

[['instant_tapioca INT'],
 ['instant_white_rice INT'],
 ['instant_yeast INT'],
 ['iodized_salt INT'],
 ['jack_cheese INT'],
 ['jalapeno_chilies INT'],
 ['jasmine_brown_rice INT'],
 ['jasmine_rice INT'],
 ['jerk_sauce INT'],
 ['jerk_seasoning INT'],
 ['juice_concentrate INT'],
 ['jumbo_pasta_shells INT'],
 ['jumbo_shells INT'],
 ['jumbo_shrimp INT'],
 ['juniper_berries INT'],
 ['kabocha_squash INT'],
 ['kaffir_lime INT'],
 ['kaffir_lime_leaves INT'],
 ['kaiser_rolls INT'],
 ['kale_leaves INT'],
 ['kecap_manis INT'],
 ['key_lime INT'],
 ['key_lime_juice INT'],
 ['kidney_beans INT'],
 ['king_crab INT'],
 ['king_crab_legs INT'],
 ['king_salmon INT'],
 ['kirby_cucumbers INT'],
 ['kiwi_fruits INT'],
 ['kosher_salt INT'],
 ['lacinato_kale INT'],
 ['lady_apples INT'],
 ['lady_fingers INT'],
 ['lager_beer INT'],
 ['lamb_chops INT'],
 ['lamb_cubes INT'],
 ['lamb_leg INT'],
 ['lamb_loin INT'],
 ['lamb_loin_chops INT'],
 ['lamb_neck INT'],
 ['lamb_racks INT'],
 ['lamb_rib_chops INT'],
 ['lamb_saus

In [192]:
ingred_list1

['instant_tapioca',
 'instant_white_rice',
 'instant_yeast',
 'iodized_salt',
 'jack_cheese',
 'jalapeno_chilies',
 'jasmine_brown_rice',
 'jasmine_rice',
 'jerk_sauce',
 'jerk_seasoning',
 'juice_concentrate',
 'jumbo_pasta_shells',
 'jumbo_shells',
 'jumbo_shrimp',
 'juniper_berries',
 'kabocha_squash',
 'kaffir_lime',
 'kaffir_lime_leaves',
 'kaiser_rolls',
 'kale_leaves',
 'kecap_manis',
 'key_lime',
 'key_lime_juice',
 'kidney_beans',
 'king_crab',
 'king_crab_legs',
 'king_salmon',
 'kirby_cucumbers',
 'kiwi_fruits',
 'kosher_salt',
 'lacinato_kale',
 'lady_apples',
 'lady_fingers',
 'lager_beer',
 'lamb_chops',
 'lamb_cubes',
 'lamb_leg',
 'lamb_loin',
 'lamb_loin_chops',
 'lamb_neck',
 'lamb_racks',
 'lamb_rib_chops',
 'lamb_sausage',
 'lamb_shanks',
 'lamb_shoulder',
 'lamb_shoulder_chops',
 'lamb_stew_meat',
 'lamb_stock',
 'lap_cheong',
 'large_egg_whites',
 'large_egg_yolks',
 'large_eggs',
 'large_flour_tortillas',
 'large_garlic_cloves',
 'large_marshmallows',
 'large_shr

In [193]:
ingred_list2=recipes_df2.columns.to_list()
ingred_list2

['label',
 'achiote_paste',
 'achiote_powder',
 'acini_di_pepe',
 'acorn_squash',
 'active_dry_yeast',
 'adobo_sauce',
 'adobo_seasoning',
 'adzuki_beans',
 'agave_nectar',
 'agave_tequila',
 'aged_balsamic_vinegar',
 'aged_cheddar_cheese',
 'aged_gouda',
 'aged_manchego_cheese',
 'ahi_tuna_steaks',
 'alfalfa_sprouts',
 'alfredo_sauce',
 'alfredo_sauce_mix',
 'all_purpose_flour',
 'allspice_berries',
 'almond_butter',
 'almond_extract',
 'almond_filling',
 'almond_flour',
 'almond_liqueur',
 'almond_meal',
 'almond_milk',
 'almond_oil',
 'almond_paste',
 'amarena_cherries',
 'amaretti_cookies',
 'amaretto_liqueur',
 'amber_rum',
 'amontillado_sherry',
 'ancho_chile_pepper',
 'ancho_powder',
 'anchovy_filets',
 'anchovy_fillets',
 'anchovy_paste',
 'andouille_sausage',
 'andouille_sausage_links',
 'angel_food_cake',
 'angel_hair',
 'angostura_bitters',
 'anise_extract',
 'anise_liqueur',
 'anise_powder',
 'anise_seed',
 'anjou_pears',
 'annatto_oil',
 'annatto_seeds',
 'apple_brandy',
 

In [194]:
#preparing the list of columns to import into pgAdmin to create table schema
ingred_schema_initial2 = pd.DataFrame(ingred_list2) + " " + 'INT'

ingred_schema2= ingred_schema_initial2.values.tolist()
ingred_schema2

[['label INT'],
 ['achiote_paste INT'],
 ['achiote_powder INT'],
 ['acini_di_pepe INT'],
 ['acorn_squash INT'],
 ['active_dry_yeast INT'],
 ['adobo_sauce INT'],
 ['adobo_seasoning INT'],
 ['adzuki_beans INT'],
 ['agave_nectar INT'],
 ['agave_tequila INT'],
 ['aged_balsamic_vinegar INT'],
 ['aged_cheddar_cheese INT'],
 ['aged_gouda INT'],
 ['aged_manchego_cheese INT'],
 ['ahi_tuna_steaks INT'],
 ['alfalfa_sprouts INT'],
 ['alfredo_sauce INT'],
 ['alfredo_sauce_mix INT'],
 ['all_purpose_flour INT'],
 ['allspice_berries INT'],
 ['almond_butter INT'],
 ['almond_extract INT'],
 ['almond_filling INT'],
 ['almond_flour INT'],
 ['almond_liqueur INT'],
 ['almond_meal INT'],
 ['almond_milk INT'],
 ['almond_oil INT'],
 ['almond_paste INT'],
 ['amarena_cherries INT'],
 ['amaretti_cookies INT'],
 ['amaretto_liqueur INT'],
 ['amber_rum INT'],
 ['amontillado_sherry INT'],
 ['ancho_chile_pepper INT'],
 ['ancho_powder INT'],
 ['anchovy_filets INT'],
 ['anchovy_fillets INT'],
 ['anchovy_paste INT'],
 ['

In [25]:
#loading dependency to encode text file with UTF-8 encoding
import io

In [195]:
#writing the schema to a text file 
with open ('ingredient_list1.txt', 'w', encoding='utf8') as my_ingred_list:
    for x in ingred_schema1:
        my_ingred_list.write("%s\n" % x)
    print('List written.')

List written.


In [196]:
#writing the schema to a text file 
with open ('ingredient_list2.txt', 'w', encoding='utf8') as my_ingred_list:
    for x in ingred_schema2:
        my_ingred_list.write("%s\n" % x)
    print('List written.')

List written.


### Load DataFrames into database

In [None]:
#import dependencies
from sqlalchemy import create_engine
import psycopg2


In [None]:
#setting up the connection to pgAdmin
rds_connection_string = "postgres:postgres@localhost:5432/kaggle_nutrition_db"

engine = create_engine(f'postgresql://{rds_connection_string}')


In [None]:
#obtaining table names
engine.table_names()

In [None]:
#reading in the kaggle_nutrients.csv file into SQL
df = pd.read_csv ('output/kaggle_nutrients.csv')

df.head()


In [None]:
#adding dataframe to SQL
df.to_sql(name='kaggle_nutrition', con=engine, if_exists='append', index=False)


In [None]:
#reading in the kaggle_recipes.csv file into SQL
df2 = pd.read_csv ('output/kaggle_recipes.csv')

df2.head()

In [None]:
#adding dataframe to SQL
df.to_sql(name='kaggle_recipes', con=engine, if_exists='append', index=False)
