# Creating Tables by importing json file

*importing libraries

In [242]:
import json
import pandas as pd

In [243]:
# List of json file names
file_names = ['file3.json']  # Specify the JSON files to be processed; others are commented out for now

# Initialize an empty list to store the DataFrames
df_list = []  # This list will hold individual dataframes created from each JSON file's 'restaurants' data

# Loop through the file names
for file_name in file_names:
    
    # Load the JSON file
    with open(file_name, 'r') as file:  # Open the JSON file in read mode
        data = json.load(file)  # Load the JSON content into a Python dictionary
    
    # Loop through each item in the JSON file
    for item in data:
        
        # Check if the 'restaurants' key exists in the current item
        if 'restaurants' in item:
            # If 'restaurants' key is found, process that part of the data
            
            # Normalize the 'restaurants' data and create a DataFrame
            df_restaurants = pd.json_normalize(item['restaurants'])
            # Convert nested JSON structure into a flat table (DataFrame)
            
            # Append the DataFrame to the list
            df_list.append(df_restaurants)
            # Add the newly created DataFrame to our list of DataFrames

# Concatenate all DataFrames into one
combined_df = pd.concat(df_list, ignore_index=True)

In [244]:
len(combined_df)

8686

In [245]:
combined_df.tail()

Unnamed: 0,restaurant.R.res_id,restaurant.apikey,restaurant.id,restaurant.name,restaurant.url,restaurant.location.address,restaurant.location.locality,restaurant.location.city,restaurant.location.city_id,restaurant.location.latitude,...,restaurant.has_online_delivery,restaurant.is_delivering_now,restaurant.deeplink,restaurant.has_table_booking,restaurant.events_url,restaurant.establishment_types,restaurant.zomato_events,restaurant.book_url,restaurant.order_url,restaurant.order_deeplink
8681,16512333,b90e6a8c738410315a20c449fe2eb1b1,16512333,Britto's Bar & Restaurant,https://www.zomato.com/goa/brittos-bar-restaur...,"Baga Calangute, Bardez, Baga, Goa",Baga,Goa,13,15.561295489,...,0,0,zomato://restaurant/16512333,0,https://www.zomato.com/goa/brittos-bar-restaur...,[],,,,
8682,17559793,b90e6a8c738410315a20c449fe2eb1b1,17559793,Fishpatrick's Crabby Cafe,https://www.zomato.com/winchester-bay-or/fishp...,"196 Bayfront Loop, Winchester Bay, OR 97467",Winchester Bay,Winchester Bay,8327,43.678998,...,0,0,zomato://restaurant/17559793,0,https://www.zomato.com/winchester-bay-or/fishp...,[],,,,
8683,16607969,b90e6a8c738410315a20c449fe2eb1b1,16607969,Bespoke Harvest,https://www.zomato.com/forrest-vic/bespoke-har...,"16 Grant St, Forrest, VIC",Forrest,Forrest,1695,-38.517292,...,0,0,zomato://restaurant/16607969,0,https://www.zomato.com/forrest-vic/bespoke-har...,[],,,,
8684,17842104,b90e6a8c738410315a20c449fe2eb1b1,17842104,Mr.,https://www.zomato.com/macedon-vic/mr-macedon?...,"23 Victoria St, Macedon, VIC",Macedon,Macedon,1565,-37.423189,...,0,0,zomato://restaurant/17842104,0,https://www.zomato.com/macedon-vic/mr-macedon/...,[],,,,
8685,17687832,b90e6a8c738410315a20c449fe2eb1b1,17687832,Vince's Restaurant & Pizzeria,https://www.zomato.com/monroe-wi/vinces-restau...,"619 4th Ave, Monroe, WI 53566",Monroe,Monroe,10238,42.606306,...,0,0,zomato://restaurant/17687832,0,https://www.zomato.com/monroe-wi/vinces-restau...,[],,,,


**Assigning column names separately for each table exporting csv files

In [246]:
def create_separate_tables(combined_df):
    # Create a DataFrame for each category of data

    # Table for restaurants
    restaurants_df = combined_df[['restaurant.id', 'restaurant.name', 'restaurant.url', 'restaurant.cuisines', 
                                  'restaurant.average_cost_for_two', 'restaurant.price_range', 'restaurant.currency', 
                                  'restaurant.switch_to_order_menu']].copy()
    restaurants_df.columns = ['ID', 'Name', 'URL', 'Cuisines', 'Average_Price_for_Two', 'Price_Range', 'Currency', 'Switch_to_order_menu']

    # Table for locations
    location_df = combined_df[['restaurant.location.address', 'restaurant.location.locality', 
                        'restaurant.location.city', 'restaurant.location.latitude', 
                        'restaurant.location.longitude', 'restaurant.location.zipcode', 
                        'restaurant.location.country_id']].copy()
    location_df.columns = ['Address', 'Locality', 'City', 'Latitude', 'Longitude', 'Zipcode', 'Country_ID']
    

    # Table for user ratings
    user_ratings_df = combined_df[['restaurant.user_rating.aggregate_rating', 'restaurant.user_rating.rating_text', 
                            'restaurant.user_rating.rating_color', 'restaurant.user_rating.votes']].copy()
    user_ratings_df.columns = ['Aggregate_Rating', 'Rating_Text', 'Rating_Color', 'Votes']

    # Table for services
    services_df = combined_df[['restaurant.has_online_delivery', 'restaurant.is_delivering_now', 
                        'restaurant.has_table_booking']].copy()
    services_df.columns = ['Has_Online_Delivery', 'Is_Delivering_Now', 'Has_Table_Booking']

    # Output tables to CSV
    restaurants_df.to_csv('restaurants.csv', index=False)
    location_df.to_csv('location.csv', index=False)
    user_ratings_df.to_csv('user_ratings.csv', index=False)
    services_df.to_csv('services.csv', index=False)

    return {
        "restaurants": "restaurants.csv",
        "location": "location.csv",
        "user_ratings": "user_ratings.csv",
        "services": "services.csv"
    }

In [247]:
combined_df.to_csv('combined_data.csv', index=False)

# Running the function to create separate CSV files
file_paths = create_separate_tables(combined_df)

**Basic info of restaurants file

In [248]:
df = pd.read_csv('restaurants.csv')
df.head()

Unnamed: 0,ID,Name,URL,Cuisines,Average_Price_for_Two,Price_Range,Currency,Switch_to_order_menu
0,2100702,Barbeque Nation,https://www.zomato.com/guwahati/barbeque-natio...,North Indian,1500,4,Rs.,0
1,16608059,1918 Bistro & Grill,https://www.zomato.com/tanunda-sa/1918-bistro-...,"Modern Australian, Australian",30,3,$,0
2,17558684,Berry Patch Restaurant,https://www.zomato.com/clatskanie-or/berry-pat...,"American, Breakfast, Desserts",10,1,$,0
3,16604896,La Trattoria of Lavandula,https://www.zomato.com/hepburn-springs-vic/la-...,"Italian, Fusion, Cafe",7,1,$,0
4,17536645,Jehova es Mi Pastor Tacos y Burritos,https://www.zomato.com/fernley-nv/jehova-es-mi...,Mexican,10,1,$,0


In [249]:
# Display dataframe information (data types, non-null counts)
df.info()

# Statistical summary for numerical fields
df.describe()

# Display columns to get an idea of what fields are available
df.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8686 entries, 0 to 8685
Data columns (total 8 columns):
 #   Column                 Non-Null Count  Dtype 
---  ------                 --------------  ----- 
 0   ID                     8686 non-null   int64 
 1   Name                   8686 non-null   object
 2   URL                    8686 non-null   object
 3   Cuisines               8686 non-null   object
 4   Average_Price_for_Two  8686 non-null   int64 
 5   Price_Range            8686 non-null   int64 
 6   Currency               8686 non-null   object
 7   Switch_to_order_menu   8686 non-null   int64 
dtypes: int64(4), object(4)
memory usage: 543.0+ KB


Index(['ID', 'Name', 'URL', 'Cuisines', 'Average_Price_for_Two', 'Price_Range',
       'Currency', 'Switch_to_order_menu'],
      dtype='object')

In [250]:
# Sum of missing values for each column
df.isnull().sum()

ID                       0
Name                     0
URL                      0
Cuisines                 0
Average_Price_for_Two    0
Price_Range              0
Currency                 0
Switch_to_order_menu     0
dtype: int64

In [252]:
# Check for duplicate rows
#df.duplicated().sum()

# Optionally, examine a few duplicate rows
df[df.duplicated(keep=False)].head()


Unnamed: 0,ID,Name,URL,Cuisines,Average_Price_for_Two,Price_Range,Currency,Switch_to_order_menu
0,2100702,Barbeque Nation,https://www.zomato.com/guwahati/barbeque-natio...,North Indian,1500,4,Rs.,0
1,16608059,1918 Bistro & Grill,https://www.zomato.com/tanunda-sa/1918-bistro-...,"Modern Australian, Australian",30,3,$,0
2,17558684,Berry Patch Restaurant,https://www.zomato.com/clatskanie-or/berry-pat...,"American, Breakfast, Desserts",10,1,$,0
3,16604896,La Trattoria of Lavandula,https://www.zomato.com/hepburn-springs-vic/la-...,"Italian, Fusion, Cafe",7,1,$,0
4,17536645,Jehova es Mi Pastor Tacos y Burritos,https://www.zomato.com/fernley-nv/jehova-es-mi...,Mexican,10,1,$,0
