In [33]:
import boto3
from dotenv import load_dotenv
import os

Downloading the raw files from the AWS S3 bucket

In [34]:
load_dotenv()

True

In [35]:
aws_access_key_id = os.getenv('AWS_ACCESS_KEY_ID')
aws_secret_access_key = os.getenv('AWS_SECRET_ACCESS_KEY')
region_name = "ap-south-1" 


In [36]:
s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

In [38]:
buckets=s3_client.list_buckets()
buckets

ClientError: An error occurred (InvalidAccessKeyId) when calling the ListBuckets operation: The AWS Access Key Id you provided does not exist in our records.

In [7]:
s3_client.download_file('chefmatebucket01','file1.json','file1.json')
s3_client.download_file('chefmatebucket01','file2.json','file2.json')
s3_client.download_file('chefmatebucket01','file3.json','file3.json')
s3_client.download_file('chefmatebucket01','file4.json','file4.json')
s3_client.download_file('chefmatebucket01','file5.json','file5.json')

Converting the JSON file to Structured Dataframe

In [8]:
import pandas as pd
import json
import warnings
warnings.filterwarnings("ignore")

In [9]:
zomato_file1  = pd.read_json("file1.json")
zomato_file2  = pd.read_json("file2.json")
zomato_file3  = pd.read_json("file3.json")
zomato_file4  = pd.read_json("file4.json")
zomato_file5  = pd.read_json("file5.json")

zomato_file1 dataframe

In [10]:
zomato_file1 

Unnamed: 0,results_found,restaurants,results_shown,results_start,message,code,status
0,17151.0,"[{'restaurant': {'has_online_delivery': 1, 'ph...",20.0,1.0,,,
1,4748.0,"[{'restaurant': {'has_online_delivery': 0, 'ph...",20.0,1.0,,,
2,13786.0,"[{'restaurant': {'has_online_delivery': 0, 'ph...",20.0,1.0,,,
3,10224.0,"[{'restaurant': {'has_online_delivery': 0, 'ph...",20.0,1.0,,,
4,7039.0,"[{'restaurant': {'has_online_delivery': 0, 'ph...",20.0,1.0,,,
...,...,...,...,...,...,...,...
474,,,,,API limit exceeded,440.0,
475,,,,,API limit exceeded,440.0,
476,,,,,API limit exceeded,440.0,
477,,,,,API limit exceeded,440.0,


In [11]:
with open('file1.json', 'r') as file:
    data = json.load(file)

In [12]:
flattened_data1 = []
for i in range(len(data)):
  keys = data[i].keys()
  if 'restaurants' in keys:
    restaurants = data[i]['restaurants']  

    for item in restaurants:
        restaurant = item['restaurant']
        flattened_data1.append({
            "Restaurant ID": restaurant.get("R", {}).get("res_id"),
            "Restaurant Name": restaurant.get("name"),
            "Location": restaurant["location"].get("address"),
            "Cuisine": restaurant.get("cuisines"),
            "Ratings": restaurant["user_rating"].get("aggregate_rating"),
            "Rating text": restaurant["user_rating"].get("rating_text"),
            "Average Cost for Two": restaurant.get("average_cost_for_two"),
            "Price Range": restaurant.get("price_range"),
            "Table Booking": restaurant.get("has_table_booking"),
            "Online Delivery": restaurant.get("has_online_delivery"),
            "Delivering now": restaurant.get("is_delivering_now"),
            "Longitude": restaurant["location"].get("longitude"),
            "Latitude": restaurant["location"].get("latitude"),
            "City": restaurant["location"].get("city"),
            "Locality": restaurant["location"].get("locality")
})

In [13]:
zomato_df1 = pd.DataFrame(flattened_data1)
zomato_df1

Unnamed: 0,Restaurant ID,Restaurant Name,Location,Cuisine,Ratings,Rating text,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude,City,Locality
0,308322,Hauz Khas Social,"9-A & 12, Hauz Khas Village, New Delhi","Continental, American, Asian, North Indian",4.3,Very Good,1600,3,1,1,0,77.1944706000,28.5542851000,New Delhi,Hauz Khas Village
1,18037817,Qubitos - The Terrace Cafe,"C-7, Vishal Enclave, Opposite Metro Pillar 417...","Thai, European, Mexican, North Indian, Chinese...",4.5,Excellent,1500,3,1,0,0,77.1177015000,28.6471325000,New Delhi,Rajouri Garden
2,312345,The Hudson Cafe,"2524, 1st Floor, Hudson Lane, Delhi University...","Cafe, Italian, Continental, Chinese",4.4,Very Good,850,2,0,1,0,77.2043172000,28.6949468000,New Delhi,Delhi University-GTB Nagar
3,307490,Summer House Cafe,"1st Floor, DDA Shopping Complex, Aurobindo Pla...","Italian, Continental",4.1,Very Good,1850,3,1,0,0,77.2038090000,28.5525204000,New Delhi,Hauz Khas
4,18241537,38 Barracks,"M-38, Outer Circle, Connaught Place, New Delhi","North Indian, Italian, Asian, American",4.4,Very Good,1600,3,1,0,0,77.2228584811,28.6330248887,New Delhi,Connaught Place
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1175,7602340,The Boozy Cow,"17 Frederick Street, New Town, Edinburgh EH2 2EY","Burger, Grill",4.0,Very Good,40,3,0,0,0,-3.1995690000,55.9519740000,Edinburgh,New Town
1176,7600217,La Favorita,"325-331 Leith Walk, Leith, Edinburgh EH6 8SA",Italian,4.5,Excellent,30,3,0,0,0,-3.1768583333,55.9646694444,Edinburgh,Leith
1177,7601577,Roseleaf Bar Cafe,"23-24 Sandport Place, Leith, Edinburgh EH6 6EW","Scottish, Cafe",4.7,Excellent,45,3,0,0,0,-3.1736790000,55.9766440000,Edinburgh,Leith
1178,7602224,Civerinos,"5 Hunter Square, Royal Mile, Old Town, Edinbur...","Pizza, Italian",3.7,Good,35,3,0,0,0,-3.1879620000,55.9496370000,Edinburgh,Old Town


Zomato_file2 dataframe

In [14]:
zomato_file2

Unnamed: 0,results_found,results_start,results_shown,restaurants
0,1263908,21,20,"[{'restaurant': {'R': {'res_id': 16668008}, 'a..."
1,1263908,21,20,"[{'restaurant': {'R': {'res_id': 16668008}, 'a..."
2,1263908,21,20,"[{'restaurant': {'R': {'res_id': 16668008}, 'a..."
3,1263908,21,20,"[{'restaurant': {'R': {'res_id': 16668008}, 'a..."
4,1263908,21,20,"[{'restaurant': {'R': {'res_id': 16668008}, 'a..."
...,...,...,...,...
545,32,21,11,"[{'restaurant': {'R': {'res_id': 18433895}, 'a..."
546,17151,21,20,"[{'restaurant': {'R': {'res_id': 313256}, 'api..."
547,33,21,12,"[{'restaurant': {'R': {'res_id': 309672}, 'api..."
548,17151,21,20,"[{'restaurant': {'R': {'res_id': 313256}, 'api..."


In [15]:
with open('file2.json', 'r') as file:
    data = json.load(file)

In [16]:
flattened_data2 = []
for i in range(len(data)):
  keys = data[i].keys()
  if 'restaurants' in keys:
    restaurants = data[i]['restaurants']  

    for item in restaurants:
        restaurant = item['restaurant']
        flattened_data2.append({
            "Restaurant ID": restaurant.get("R", {}).get("res_id"),
            "Restaurant Name": restaurant.get("name"),
            "Location": restaurant["location"].get("address"),
            "Cuisine": restaurant.get("cuisines"),
            "Ratings": restaurant["user_rating"].get("aggregate_rating"),
            "Rating text": restaurant["user_rating"].get("rating_text"),
            "Average Cost for Two": restaurant.get("average_cost_for_two"),
            "Price Range": restaurant.get("price_range"),
            "Table Booking": restaurant.get("has_table_booking"),
            "Online Delivery": restaurant.get("has_online_delivery"),
            "Delivering now": restaurant.get("is_delivering_now"),
            "Longitude": restaurant["location"].get("longitude"),
            "Latitude": restaurant["location"].get("latitude"),
            "City": restaurant["location"].get("city"),
            "Locality": restaurant["location"].get("locality")
})

In [17]:
zomato_df2 = pd.DataFrame(flattened_data2)
zomato_df2

Unnamed: 0,Restaurant ID,Restaurant Name,Location,Cuisine,Ratings,Rating text,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude,City,Locality
0,16668008,Arigato Sushi,"14 Second Ave North, Yorkton, SK S3N 1G1",Asian,3.3,Average,25,2,0,0,0,-102.4613173000,51.2106824000,Yorkton,Yorkton
1,801690,Mocha,"CP-1, 2nd Floor, Anand Plaza, Viram Khand-1, N...","Cafe, Italian, Continental",4.6,Excellent,800,3,0,0,0,81.0011849000,26.8528099000,Lucknow,Gomti Nagar
2,17558738,Blue House Cafe,"919 Bridge St, Vernonia, OR 97064","Coffee and Tea, Mediterranean",4.3,Very Good,10,1,0,0,0,-123.1954368000,45.8586670000,Vernonia,Vernonia
3,16611701,Star Buffet,"58 Hanbury St, Mayfield, NSW",Asian,2.9,Average,20,2,0,0,0,151.7343832000,-32.8991780000,Mayfield,Mayfield
4,2100784,11th Avenue Cafe Bistro,"Opposite Assam State Museum, Dighalipukhuri, T...","Cafe, American, Italian, Continental",4.1,Very Good,400,2,0,0,0,91.7523143068,26.1860014826,Guwahati,Uzan Bazaar
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10597,18289242,Cyber Hub Social,"Cyber Hub, DLF Cyber City, Gurgaon","Continental, American, Asian, North Indian",4.3,Very Good,1500,3,1,1,0,77.0885080000,28.4939356000,Gurgaon,"Cyber Hub, DLF Cyber City"
10598,799,Gulati,"6, Pandara Road Market, New Delhi","North Indian, Mughlai",4.4,Very Good,1500,3,0,1,0,77.2297330000,28.6081402000,New Delhi,Pandara Road Market
10599,301998,Punjab Grill,"Pacific Mall, Tagore Garden, New Delhi",North Indian,4.3,Very Good,1600,3,1,1,0,77.1065034000,28.6422159000,New Delhi,Tagore Garden
10600,307113,Diggin,"Anand Lok Shopping Centre, Opposite Gargi Coll...","Italian, Continental, Cafe",4.2,Very Good,1400,3,1,1,0,77.2194983000,28.5556355000,New Delhi,Anand Lok


Zomato_file3 dataframe

In [18]:
zomato_file3

Unnamed: 0,results_found,results_start,results_shown,restaurants,code,status,message
0,1263908.0,41.0,20.0,"[{'restaurant': {'R': {'res_id': 2100702}, 'ap...",,,
1,1263908.0,41.0,20.0,"[{'restaurant': {'R': {'res_id': 2100702}, 'ap...",,,
2,1263908.0,41.0,20.0,"[{'restaurant': {'R': {'res_id': 2100702}, 'ap...",,,
3,1263908.0,41.0,20.0,"[{'restaurant': {'R': {'res_id': 2100702}, 'ap...",,,
4,1263908.0,41.0,20.0,"[{'restaurant': {'R': {'res_id': 2100702}, 'ap...",,,
...,...,...,...,...,...,...,...
545,,,,,440.0,,API limit exceeded
546,,,,,440.0,,API limit exceeded
547,,,,,440.0,,API limit exceeded
548,,,,,440.0,,API limit exceeded


In [19]:
with open('file3.json', 'r') as file:
    data = json.load(file)

In [20]:
flattened_data3 = []
for i in range(len(data)):
  keys = data[i].keys()
  if 'restaurants' in keys:
    restaurants = data[i]['restaurants']  

    for item in restaurants:
        restaurant = item['restaurant']
        flattened_data3.append({
            "Restaurant ID": restaurant.get("R", {}).get("res_id"),
            "Restaurant Name": restaurant.get("name"),
            "Location": restaurant["location"].get("address"),
            "Cuisine": restaurant.get("cuisines"),
            "Ratings": restaurant["user_rating"].get("aggregate_rating"),
            "Rating text": restaurant["user_rating"].get("rating_text"),
            "Average Cost for Two": restaurant.get("average_cost_for_two"),
            "Price Range": restaurant.get("price_range"),
            "Table Booking": restaurant.get("has_table_booking"),
            "Online Delivery": restaurant.get("has_online_delivery"),
            "Delivering now": restaurant.get("is_delivering_now"),
            "Longitude": restaurant["location"].get("longitude"),
            "Latitude": restaurant["location"].get("latitude"),
            "City": restaurant["location"].get("city"),
            "Locality": restaurant["location"].get("locality")
})

In [21]:
zomato_df3 = pd.DataFrame(flattened_data3)
zomato_df3

Unnamed: 0,Restaurant ID,Restaurant Name,Location,Cuisine,Ratings,Rating text,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude,City,Locality
0,2100702,Barbeque Nation,"2nd Floor, Adityam Building, Ulubari, Guwahati",North Indian,4.9,Excellent,1500,4,0,0,0,91.7598570000,26.1721190000,Guwahati,Ulubari
1,16608059,1918 Bistro & Grill,"94 Murray St, Tanunda, SA","Modern Australian, Australian",4.4,Very Good,30,3,0,0,0,138.9660640000,-34.5196190000,Tanunda,Tanunda
2,17558684,Berry Patch Restaurant,"49289 Us-30, Westport, OR 97016","American, Breakfast, Desserts",4.3,Very Good,10,1,0,0,0,-123.3681510000,46.1269670000,Clatskanie,Clatskanie
3,16604896,La Trattoria of Lavandula,"350 Hepburn-Newstead Road, Hepburn Springs, VIC","Italian, Fusion, Cafe",3.8,Good,7,1,0,0,0,144.1100620000,-37.2754940000,Hepburn Springs,Hepburn Springs
4,17536645,Jehova es Mi Pastor Tacos y Burritos,"135 W. Main Street, Fernley, NV 89408",Mexican,3.7,Good,10,1,0,0,0,-119.2526940000,39.6075150000,Fernley,Fernley
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8681,16512333,Britto's Bar & Restaurant,"Baga Calangute, Bardez, Baga, Goa","North Indian, Continental, Chinese, Seafood",4.3,Very Good,1400,4,0,0,0,73.7494780582,15.5612954890,Goa,Baga
8682,17559793,Fishpatrick's Crabby Cafe,"196 Bayfront Loop, Winchester Bay, OR 97467","Burger, Seafood, Steak",3.2,Average,25,2,0,0,0,-124.1753460000,43.6789980000,Winchester Bay,Winchester Bay
8683,16607969,Bespoke Harvest,"16 Grant St, Forrest, VIC","Cafe, Australian",3.7,Good,20,2,0,0,0,143.7143150000,-38.5172920000,Forrest,Forrest
8684,17842104,Mr.,"23 Victoria St, Macedon, VIC",Cafe,3.5,Good,20,2,0,0,0,144.5641740000,-37.4231890000,Macedon,Macedon


Zomato_file4 dataframe

In [22]:
zomato_file4

Unnamed: 0,results_found,results_start,results_shown,restaurants
0,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16604370}, 'a..."
1,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16604370}, 'a..."
2,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16604370}, 'a..."
3,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16604370}, 'a..."
4,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16604370}, 'a..."
...,...,...,...,...
474,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16643459}, 'a..."
475,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16643459}, 'a..."
476,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16643459}, 'a..."
477,1263908,61,20,"[{'restaurant': {'R': {'res_id': 16643459}, 'a..."


In [23]:
with open('file4.json', 'r') as file:
    data = json.load(file)

In [24]:
flattened_data4 = []
for i in range(len(data)):
  keys = data[i].keys()
  if 'restaurants' in keys:
    restaurants = data[i]['restaurants']  

    for item in restaurants:
        restaurant = item['restaurant']
        flattened_data4.append({
            "Restaurant ID": restaurant.get("R", {}).get("res_id"),
            "Restaurant Name": restaurant.get("name"),
            "Location": restaurant["location"].get("address"),
            "Cuisine": restaurant.get("cuisines"),
            "Ratings": restaurant["user_rating"].get("aggregate_rating"),
            "Rating text": restaurant["user_rating"].get("rating_text"),
            "Average Cost for Two": restaurant.get("average_cost_for_two"),
            "Price Range": restaurant.get("price_range"),
            "Table Booking": restaurant.get("has_table_booking"),
            "Online Delivery": restaurant.get("has_online_delivery"),
            "Delivering now": restaurant.get("is_delivering_now"),
            "Longitude": restaurant["location"].get("longitude"),
            "Latitude": restaurant["location"].get("latitude"),
            "City": restaurant["location"].get("city"),
            "Locality": restaurant["location"].get("locality")
})

In [25]:
zomato_df4 = pd.DataFrame(flattened_data4)
zomato_df4

Unnamed: 0,Restaurant ID,Restaurant Name,Location,Cuisine,Ratings,Rating text,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude,City,Locality
0,16604370,Mad Cowes Cafe,"4/17 The Esplanade, Cowes, VIC","Breakfast, Coffee and Tea, Modern Australian",3.7,Good,20,2,0,0,0,145.2378130000,-38.4483070000,Phillip Island,Phillip Island
1,16643459,Consort Restaurant,"4931 50th Street, Consort, AB T0C 1B0","Chinese, Canadian",3.0,Average,25,2,0,0,0,-110.7746994000,52.0082889000,Consort,Consort
2,18185059,Cafe Arabelle,"Ayala Mall, Solenad, Nuvali, Santa Rosa - Taga...","Cafe, American, Italian, Filipino",3.6,Good,800,3,0,0,0,121.0570400000,14.2370820000,Santa Rosa,"Nuvali, Don Jose, Santa Rosa"
3,18385186,The Cafe Baraco,"34, Shribhuvan Complex, Near Memnagar Fire Sta...","Cafe, Italian",4.4,Very Good,600,2,0,1,0,72.5504755000,23.0443367000,Ahmedabad,Navrangpura
4,2200000,Kesar Da Dhabha,"Near Telephone Exchange, Chowk Passian, Shastr...",North Indian,4.1,Very Good,500,2,0,0,0,74.8730050000,31.6243860000,Amritsar,Town Hall
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8860,17557488,Burger Queen Drive In,"109 S F St, Lakeview, OR 97630","Burger, Desserts, Sandwich",3.6,Good,10,1,0,0,0,-120.3458000000,42.1885000000,Lakeview,Lakeview
8861,18182702,Nonna's Pasta & Pizzeria,"Ground Floor, Building G, Solenad 3, Nuvali, D...","Italian, Pizza",4.0,Very Good,850,3,0,0,0,121.0565873981,14.2376789732,Santa Rosa,"Solenad 3, Don Jose, Santa Rosa"
8862,16608864,Taste of Balingup,"63 South Western Hwy, Balingup, WA",Modern Australian,3.2,Average,20,2,0,0,0,115.9844924000,-33.7845269000,Balingup,Balingup
8863,13231,Le Plaisir,"759/125, Rajkamal, Opposite Kelkar Eye Hospita...","European, Desserts",4.8,Excellent,1000,3,0,0,0,73.8384293765,18.5142099831,Pune,Deccan Gymkhana


Zomato_file5 dataframe

In [26]:
zomato_file5

Unnamed: 0,results_found,restaurants,results_shown,results_start,message,code,status
0,0.0,[],0.0,1.0,,,
1,6835.0,"[{'restaurant': {'has_online_delivery': 0, 'ph...",20.0,1.0,,,
2,0.0,[],0.0,1.0,,,
3,0.0,[],0.0,1.0,,,
4,8680.0,"[{'restaurant': {'has_online_delivery': 0, 'ph...",20.0,1.0,,,
...,...,...,...,...,...,...,...
295,,,,,API limit exceeded,440.0,
296,,,,,API limit exceeded,440.0,
297,,,,,API limit exceeded,440.0,
298,,,,,API limit exceeded,440.0,


In [27]:
with open('file5.json', 'r') as file:
    data = json.load(file)

In [28]:
flattened_data5 = []
for i in range(len(data)):
  keys = data[i].keys()
  if 'restaurants' in keys:
    restaurants = data[i]['restaurants']  

    for item in restaurants:
        restaurant = item['restaurant']
        flattened_data5.append({
            "Restaurant ID": restaurant.get("R", {}).get("res_id"),
            "Restaurant Name": restaurant.get("name"),
            "Location": restaurant["location"].get("address"),
            "Cuisine": restaurant.get("cuisines"),
            "Ratings": restaurant["user_rating"].get("aggregate_rating"),
            "Rating text": restaurant["user_rating"].get("rating_text"),
            "Average Cost for Two": restaurant.get("average_cost_for_two"),
            "Price Range": restaurant.get("price_range"),
            "Table Booking": restaurant.get("has_table_booking"),
            "Online Delivery": restaurant.get("has_online_delivery"),
            "Delivering now": restaurant.get("is_delivering_now"),
            "Longitude": restaurant["location"].get("longitude"),
            "Latitude": restaurant["location"].get("latitude"),
            "City": restaurant["location"].get("city"),
            "Locality": restaurant["location"].get("locality")
})

In [29]:
zomato_df5 = pd.DataFrame(flattened_data5)
zomato_df5

Unnamed: 0,Restaurant ID,Restaurant Name,Location,Cuisine,Ratings,Rating text,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude,City,Locality
0,17066603,The Coop,"610 W Morse Boulevard, Winter Park, FL 32789","Southern, Cajun, Soul Food",3.6,Good,25,2,0,0,0,-81.3572190000,28.5973660000,Orlando,Winter Park
1,17059541,Maggiano's Little Italy,"9101 International Drive,Orlando, FL 32819",Italian,4.4,Very Good,50,4,0,0,0,-81.4714470000,28.4332350000,Orlando,I-Drive/Universal
2,17064405,Tako Cheena by Pom Pom,"932 North Mills Avenue, Orlando, FL 32803","Asian, Latin American, Vegetarian",4.4,Very Good,10,1,0,0,0,-81.3645470000,28.5578450000,Orlando,Mills 50
3,17057797,Bosphorous Turkish Cuisine,"108 S Park Ave, Winter Park, FL 32789","Mediterranean, Turkish",4.2,Very Good,40,3,0,0,0,-81.3508344000,28.5976271000,Orlando,Winter Park
4,17057591,Bahama Breeze Island Grille,"8849 International Drive, Orlando, FL 32819",Caribbean,4.3,Very Good,45,3,0,0,0,-81.4715260000,28.4370650000,Orlando,I-Drive/Universal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,17582627,Senor Iguanas,"961 Hiline Rd, Pocatello, ID 83201",Mexican,3.6,Good,0,1,0,0,0,-112.4433000000,42.8942000000,Pocatello,Pocatello
416,17582625,Sandpiper Restaurant & Lounge,"1400 Bench Rd, Pocatello, ID 83201","American, Seafood, Steak",3.6,Good,40,3,0,0,0,-112.4320000000,42.9012000000,Pocatello,Pocatello
417,17582668,Texas Roadhouse,"560 Bullock Street, Pocatello, ID 83202","American, BBQ, Steak",3.5,Good,45,3,0,0,0,-112.4613260000,42.9105180000,Pocatello,Chubbuck
418,17582498,Riverwalk Cafe,"695 E Main St, Lava Hot Springs, ID 83246","Asian, Thai",3.6,Good,10,1,0,0,0,-112.0132000000,42.6200000000,Pocatello,Lava Hot Springs


Datacleaning and preprocessing

In [30]:
zomato_df = pd.concat([zomato_df1,zomato_df2,zomato_df3,zomato_df4,zomato_df5])

In [31]:
zomato_df

Unnamed: 0,Restaurant ID,Restaurant Name,Location,Cuisine,Ratings,Rating text,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude,City,Locality
0,308322,Hauz Khas Social,"9-A & 12, Hauz Khas Village, New Delhi","Continental, American, Asian, North Indian",4.3,Very Good,1600,3,1,1,0,77.1944706000,28.5542851000,New Delhi,Hauz Khas Village
1,18037817,Qubitos - The Terrace Cafe,"C-7, Vishal Enclave, Opposite Metro Pillar 417...","Thai, European, Mexican, North Indian, Chinese...",4.5,Excellent,1500,3,1,0,0,77.1177015000,28.6471325000,New Delhi,Rajouri Garden
2,312345,The Hudson Cafe,"2524, 1st Floor, Hudson Lane, Delhi University...","Cafe, Italian, Continental, Chinese",4.4,Very Good,850,2,0,1,0,77.2043172000,28.6949468000,New Delhi,Delhi University-GTB Nagar
3,307490,Summer House Cafe,"1st Floor, DDA Shopping Complex, Aurobindo Pla...","Italian, Continental",4.1,Very Good,1850,3,1,0,0,77.2038090000,28.5525204000,New Delhi,Hauz Khas
4,18241537,38 Barracks,"M-38, Outer Circle, Connaught Place, New Delhi","North Indian, Italian, Asian, American",4.4,Very Good,1600,3,1,0,0,77.2228584811,28.6330248887,New Delhi,Connaught Place
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
415,17582627,Senor Iguanas,"961 Hiline Rd, Pocatello, ID 83201",Mexican,3.6,Good,0,1,0,0,0,-112.4433000000,42.8942000000,Pocatello,Pocatello
416,17582625,Sandpiper Restaurant & Lounge,"1400 Bench Rd, Pocatello, ID 83201","American, Seafood, Steak",3.6,Good,40,3,0,0,0,-112.4320000000,42.9012000000,Pocatello,Pocatello
417,17582668,Texas Roadhouse,"560 Bullock Street, Pocatello, ID 83202","American, BBQ, Steak",3.5,Good,45,3,0,0,0,-112.4613260000,42.9105180000,Pocatello,Chubbuck
418,17582498,Riverwalk Cafe,"695 E Main St, Lava Hot Springs, ID 83246","Asian, Thai",3.6,Good,10,1,0,0,0,-112.0132000000,42.6200000000,Pocatello,Lava Hot Springs


In [32]:
zomato_df.shape

(29753, 15)

In [33]:
zomato_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 29753 entries, 0 to 419
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype 
---  ------                --------------  ----- 
 0   Restaurant ID         29753 non-null  int64 
 1   Restaurant Name       29753 non-null  object
 2   Location              29753 non-null  object
 3   Cuisine               29753 non-null  object
 4   Ratings               29753 non-null  object
 5   Rating text           29753 non-null  object
 6   Average Cost for Two  29753 non-null  int64 
 7   Price Range           29753 non-null  int64 
 8   Table Booking         29753 non-null  int64 
 9   Online Delivery       29753 non-null  int64 
 10  Delivering now        29753 non-null  int64 
 11  Longitude             29753 non-null  object
 12  Latitude              29753 non-null  object
 13  City                  29753 non-null  object
 14  Locality              29753 non-null  object
dtypes: int64(6), object(9)
memory usage: 3.6+ M

In [34]:
zomato_df['Ratings'] = pd.to_numeric(zomato_df['Ratings'], errors='coerce')
zomato_df['Ratings'] = pd.to_numeric(zomato_df['Ratings'], errors='coerce')
zomato_df['Longitude'] = pd.to_numeric(zomato_df['Longitude'], errors='coerce')
zomato_df['Latitude'] = pd.to_numeric(zomato_df['Latitude'], errors='coerce')

In [35]:
zomato_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 29753 entries, 0 to 419
Data columns (total 15 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Restaurant ID         29753 non-null  int64  
 1   Restaurant Name       29753 non-null  object 
 2   Location              29753 non-null  object 
 3   Cuisine               29753 non-null  object 
 4   Ratings               29753 non-null  float64
 5   Rating text           29753 non-null  object 
 6   Average Cost for Two  29753 non-null  int64  
 7   Price Range           29753 non-null  int64  
 8   Table Booking         29753 non-null  int64  
 9   Online Delivery       29753 non-null  int64  
 10  Delivering now        29753 non-null  int64  
 11  Longitude             29753 non-null  float64
 12  Latitude              29753 non-null  float64
 13  City                  29753 non-null  object 
 14  Locality              29753 non-null  object 
dtypes: float64(3), int64(6), o

In [36]:
zomato_df.isnull().sum()

Restaurant ID           0
Restaurant Name         0
Location                0
Cuisine                 0
Ratings                 0
Rating text             0
Average Cost for Two    0
Price Range             0
Table Booking           0
Online Delivery         0
Delivering now          0
Longitude               0
Latitude                0
City                    0
Locality                0
dtype: int64

In [37]:
zomato_df.describe()

Unnamed: 0,Restaurant ID,Ratings,Average Cost for Two,Price Range,Table Booking,Online Delivery,Delivering now,Longitude,Latitude
count,29753.0,29753.0,29753.0,29753.0,29753.0,29753.0,29753.0,29753.0,29753.0
mean,10354750.0,3.325201,2544.60888,2.183444,0.144221,0.192115,0.001949,63.466626,19.114803
std,8534506.0,1.270941,18230.5124,0.954264,0.351319,0.39397,0.04411,68.264524,23.496187
min,4.0,0.0,0.0,1.0,0.0,0.0,0.0,-157.948486,-41.330428
25%,306407.0,3.1,50.0,1.0,0.0,0.0,0.0,77.088328,23.044337
50%,16608860.0,3.7,450.0,2.0,0.0,0.0,0.0,77.219739,28.56772
75%,18245280.0,4.1,1000.0,3.0,0.0,0.0,0.0,77.363084,28.642216
max,18500650.0,4.9,800000.0,4.0,1.0,1.0,1.0,174.832089,73.99


In [38]:
zomato_df.to_csv('zomato_df.csv',index=False)