# Preprocessing and labeling

In [1]:
import numpy as np
import pandas as pd
import json
import keras

# Import json file
with open('dataset.json') as file:
    data = json.load(file)

# Convert json to pandas dataframe
df = pd.json_normalize(data)

# Split cuisines for double entries
df['cuisines_1'] = pd.json_normalize(pd.json_normalize(df['cuisines'])[0])['slug']
df['cuisines_2'] = pd.json_normalize(pd.json_normalize(df['cuisines'])[1])['slug']

# Select only relevant columns
df = df[['objectID', 'price_category.slug', 'cuisines_1', 'cuisines_2', 'more_details.full_images']].copy()

# Drop rows with missing values in price_category
df.dropna(subset=['price_category.slug'], inplace=True)

df.head()

Unnamed: 0,objectID,price_category.slug,cuisines_1,cuisines_2,more_details.full_images
0,78848,mid-range,greek,,"[{'image_id': '2088757', 'image_url': 'https:/..."
1,79182,mid-range,modern-french,,"[{'image_id': '1956014', 'image_url': 'https:/..."
2,79041,premium,seafood-1096,,"[{'image_id': '2200565', 'image_url': 'https:/..."
3,79268,premium,modern-french,,"[{'image_id': '2640075', 'image_url': 'https:/..."
4,79301,premium,seafood-1096,classic-cuisine,"[{'image_id': '1955141', 'image_url': 'https:/..."


In [2]:
images_df = pd.json_normalize(df['more_details.full_images'])

images_df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,49,50,51,52,53,54,55,56,57,58
0,"{'image_id': '2088757', 'image_url': 'https://...","{'image_id': '2182000', 'image_url': 'https://...","{'image_id': '4151606', 'image_url': 'https://...","{'image_id': '4151588', 'image_url': 'https://...","{'image_id': '2224983', 'image_url': 'https://...",,,,,,...,,,,,,,,,,
1,"{'image_id': '1956014', 'image_url': 'https://...",,,,,,,,,,...,,,,,,,,,,
2,"{'image_id': '2200565', 'image_url': 'https://...","{'image_id': '2098284', 'image_url': 'https://...","{'image_id': '2289173', 'image_url': 'https://...","{'image_id': '2225512', 'image_url': 'https://...","{'image_id': '2089586', 'image_url': 'https://...","{'image_id': '2238739', 'image_url': 'https://...","{'image_id': '2226113', 'image_url': 'https://...","{'image_id': '2152211', 'image_url': 'https://...","{'image_id': '2207861', 'image_url': 'https://...","{'image_id': '2637246', 'image_url': 'https://...",...,,,,,,,,,,
3,"{'image_id': '2640075', 'image_url': 'https://...","{'image_id': '2119391', 'image_url': 'https://...","{'image_id': '2130677', 'image_url': 'https://...","{'image_id': '2200944', 'image_url': 'https://...","{'image_id': '2196069', 'image_url': 'https://...","{'image_id': '2138037', 'image_url': 'https://...","{'image_id': '2214735', 'image_url': 'https://...","{'image_id': '2237152', 'image_url': 'https://...","{'image_id': '2255648', 'image_url': 'https://...","{'image_id': '2216806', 'image_url': 'https://...",...,,,,,,,,,,
4,"{'image_id': '1955141', 'image_url': 'https://...",,,,,,,,,,...,,,,,,,,,,
