# Assumptions for all parts of Task 1: Assume dummy data is to be included and not deleted

# Part 1

In [1]:
import requests
import csv

# URL of the online JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Send a GET request to fetch the JSON data
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    
    # Parse the JSON data
    data = response.json()

    # Extract the required fields and place into extracted_data array
    extracted_data = []
    
    # Iterate through each element in the array that contains 20 restaurants 
    for index in range(len(data)):
        
        # Iterate through each restaurant and extract the data required
        for restaurant in data[index]["restaurants"]:
            extracted_data.append([
            restaurant["restaurant"]["id"],
            restaurant["restaurant"]["name"],
            restaurant["restaurant"]["location"]["country_id"],
            restaurant["restaurant"]["location"]["city"],
            restaurant["restaurant"]["user_rating"]["votes"],
            float(restaurant["restaurant"]["user_rating"]["aggregate_rating"]),
            restaurant["restaurant"]["cuisines"]
        ])

    # Define the filename for the CSV file
    filename = "restaurants.csv"

    # Write the extracted data into a CSV file
    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
        csv_writer = csv.writer(csvfile)
        
        # Write the header
        csv_writer.writerow([
            "Restaurant Id", 
            "Restaurant Name", 
            "Country", "City", 
            "User Rating Votes", 
            "User Aggregate Rating", 
            "Cuisines"
        ])
        
        # Write the extracted data
        csv_writer.writerows(extracted_data)


In [2]:
import pandas as pd

restaurantsdf = pd.read_csv("restaurants.csv")
restaurantsdf.head()

Unnamed: 0,Restaurant Id,Restaurant Name,Country,City,User Rating Votes,User Aggregate Rating,Cuisines
0,18649486,The Drunken Botanist,1,Gurgaon,4765,4.4,"Continental, Italian, North Indian, Chinese"
1,308322,Hauz Khas Social,1,New Delhi,13627,4.6,"Continental, American, Asian, North Indian, Ch..."
2,18856789,AIR- An Ivory Region,1,New Delhi,1819,4.1,"North Indian, Chinese, Continental, Asian"
3,307374,AMA Cafe,1,New Delhi,3252,4.4,"Cafe, Juices"
4,18238278,Tamasha,1,New Delhi,8112,4.4,"Finger Food, North Indian, Continental, Italian"


In [3]:
# Read Country-Code.csv
countrycodedf = pd.read_excel("Country-Code.xlsx")
countrycodedf.head()

Unnamed: 0,Country Code,Country
0,1,India
1,14,Australia
2,30,Brazil
3,37,Canada
4,94,Indonesia


In [4]:
# Iterate through country column in restaurantsdf and replace code with actual country name
restaurantscountrycode = restaurantsdf["Country"]
countrycode = countrycodedf["Country Code"]
country = countrycodedf["Country"]

for restaurantcountryindex in range(0, len(restaurantscountrycode)):
    for code in range(0, len(countrycode)):
        if restaurantscountrycode[restaurantcountryindex] == countrycode[code]:
            
            # Updating the column value/data 
            restaurantsdf.loc[restaurantcountryindex, "Country"] = country[code]
  
            # Writing into the file 
            restaurantsdf.to_csv("restaurants.csv", index=False)

# Required data will be stored in restaurants.csv excel sheet in the folder

# Part 2

In [5]:
import requests
import csv

# URL of the online JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Send a GET request to fetch the JSON data
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON data
    data = response.json()

    # Extract the required fields and place into extracted_data array
    extracted_data = []

    # Iterate through each element in the array that contains 20 restaurants 
    for index in range(len(data)):
        
        # Iterate through each restaurant and extract the data required
        for restaurant in data[index]["restaurants"]:
            
            # Check if the restaurant has zomato_events key
            if "zomato_events" in restaurant["restaurant"]:
                
                # Iterate through each event
                for eventcount in restaurant["restaurant"]["zomato_events"]:
                    event = eventcount["event"]
                    
                    # Extracting required fields
                    event_id = event["event_id"]
                    restaurant_id = restaurant["restaurant"]["id"]
                    restaurant_name = restaurant["restaurant"]["name"]
                    
                    # Check if the event has photos
                    if "photos" in event and event["photos"]:
                        
                        # Get the URL of the first photo
                        photo_url = event["photos"][0]["photo"]["url"]
                    else:
                        
                        # If no photos are available, set photo_url to "NA"
                        photo_url = "NA"
                        
                    event_title = event.get("title", "NA")
                    event_start_date = event.get("start_date", "NA")
                    event_end_date = event.get("end_date", "NA")
                    
                    # Append to extracted_data
                    extracted_data.append([
                        event_id,
                        restaurant_id,
                        restaurant_name,
                        photo_url,
                        event_title,
                        event_start_date,
                        event_end_date
                    ])

    # Define the filename for the CSV file
    filename = "restaurant_events.csv"

    # Write the extracted data into a CSV file
    with open(filename, "w", newline="", encoding="utf-8") as csvfile:
        csv_writer = csv.writer(csvfile)
        # Write the header
        csv_writer.writerow([
            "Event Id", 
            "Restaurant Id", 
            "Restaurant Name", 
            "Photo URL", 
            "Event Title", 
            "Event Start Date", 
            "Event End Date"])
        
        # Write the extracted data
        csv_writer.writerows(extracted_data)

In [6]:
import pandas as pd

restaurantsdf = pd.read_csv("restaurant_events.csv")
restaurantsdf.head()

Unnamed: 0,Event Id,Restaurant Id,Restaurant Name,Photo URL,Event Title,Event Start Date,Event End Date
0,322331,18649486,The Drunken Botanist,https://b.zmtcdn.com/data/zomato_events/photos...,BackToBasic Wednesdays !!\n\n\n,2019-03-06,2019-08-28
1,332812,308322,Hauz Khas Social,https://b.zmtcdn.com/data/zomato_events/photos...,Live 20/20 Match Screenings,2019-03-29,2019-05-23
2,336644,18856789,AIR- An Ivory Region,https://b.zmtcdn.com/data/zomato_events/photos...,Dhol Bhangra Night,2019-04-10,2019-04-11
3,336889,18382360,Local,,Cocktail Wednesday,2019-04-10,2019-04-10
4,336888,18382360,Local,,Cocktail Wednesday,2019-04-10,2019-04-10


# Required data will be stored in restaurant_events.csv excel sheet

# Please increase size of the Event Start Date and Event End Date columns in the csv to see the full date, if not it will be displayed as "########"

# Part 3

In [7]:
import requests

# URL of the online JSON data
url = "https://raw.githubusercontent.com/Papagoat/brain-assessment/main/restaurant_data.json"

# Send a GET request to fetch the JSON data
response = requests.get(url)

# Check if the request was successful (status code 200)
if response.status_code == 200:
    # Parse the JSON data
    data = response.json()
    
    # Initialize dictionaries to store minimum and maximum aggregate ratings for each rating text category
    ratingthresholds = {
        "Excellent": {"min": float('inf'), "max": float('-inf')},
        "Very Good": {"min": float('inf'), "max": float('-inf')},
        "Good": {"min": float('inf'), "max": float('-inf')},
        "Average": {"min": float('inf'), "max": float('-inf')},
        "Poor": {"min": float('inf'), "max": float('-inf')}
    }
    
    # Iterate through each element in the array that contains 20 restaurants 
    for index in range(len(data)):
        
        # Iterate through each restaurant and extract the data required
        for restaurant in data[index]["restaurants"]:
            
            # Extract aggregate rating
            aggregaterating = float(restaurant["restaurant"]["user_rating"]["aggregate_rating"])
            
            # Determine the rating text category
            ratingtext = restaurant["restaurant"]["user_rating"]["rating_text"]
            
            # Check if the rating text is one of the allowed values
            if ratingtext not in ["Excellent", "Very Good", "Good", "Average", "Poor"]:
                continue 
            
            # Update minimum and maximum aggregate ratings for the corresponding rating text category
            ratingthresholds[ratingtext]["min"] = min(ratingthresholds[ratingtext]["min"], aggregaterating)
            ratingthresholds[ratingtext]["max"] = max(ratingthresholds[ratingtext]["max"], aggregaterating)

    # Print the thresholds for each rating text category
    for ratingtext, thresholds in ratingthresholds.items():
        print(f"{ratingtext}: Minimum Aggregate Rating = {thresholds['min']}, Maximum Aggregate Rating = {thresholds['max']}")


Excellent: Minimum Aggregate Rating = 4.5, Maximum Aggregate Rating = 4.9
Very Good: Minimum Aggregate Rating = 4.0, Maximum Aggregate Rating = 4.4
Good: Minimum Aggregate Rating = 3.5, Maximum Aggregate Rating = 3.9
Average: Minimum Aggregate Rating = 2.5, Maximum Aggregate Rating = 3.4
Poor: Minimum Aggregate Rating = 2.2, Maximum Aggregate Rating = 2.2


# Excellent: Minimum Aggregate Rating = 4.5, Maximum Aggregate Rating = 4.9
# Very Good: Minimum Aggregate Rating = 4.0, Maximum Aggregate Rating = 4.4
# Good: Minimum Aggregate Rating = 3.5, Maximum Aggregate Rating = 3.9
# Average: Minimum Aggregate Rating = 2.5, Maximum Aggregate Rating = 3.4
# Poor: Minimum Aggregate Rating = 2.2, Maximum Aggregate Rating = 2.2