# Level 1

## Task 1 : Top Cuisines   

### Determine the top three most common cuisines in the dataset.

In [None]:
import pandas as pd
from collections import Counter

def top_three_cuisines(csv_file_path, cuisine_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Extract the column containing cuisines
    cuisine_data = df[cuisine_column_name].tolist()

    # To Count the occurrences of each cuisine in the dataset
    cuisine_counter = Counter(cuisine_data)

    # To Get the top three most common cuisines
    top_three = cuisine_counter.most_common(3)

    return top_three

csv_file_path = 'Dataset .csv'
cuisine_column_name = 'Cuisines'

# To Get the top three cuisines from the CSV file
result = top_three_cuisines(csv_file_path, cuisine_column_name)

# To Print the result
print("Top Three Cuisines:", result)



### Calculate the percentage of restaurants that serve each of the top cuisines.

In [None]:
import pandas as pd
from collections import Counter

def top_cuisines_percentage(csv_file_path, cuisine_column_name, top_n=3):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Extract the column containing cuisines
    cuisine_data = df[cuisine_column_name].tolist()

    # To Count the occurrences of each cuisine in the dataset
    cuisine_counter = Counter(cuisine_data)

    # To Get the top N most common cuisines
    top_cuisines = [cuisine for cuisine, count in cuisine_counter.most_common(top_n)]

    # To Filter the DataFrame to include only rows with top cuisines
    top_cuisines_df = df[df[cuisine_column_name].isin(top_cuisines)]

    # To Calculate the percentage of restaurants for each top cuisine
    percentages = {}
    total_restaurants = len(df)

    for cuisine in top_cuisines:
        cuisine_count = len(top_cuisines_df[top_cuisines_df[cuisine_column_name] == cuisine])
        percentage = (cuisine_count / total_restaurants) * 100
        percentages[cuisine] = round(percentage, 2)

    return percentages

csv_file_path = 'Dataset .csv'
cuisine_column_name = 'Cuisines'

# To Get the percentage of restaurants for each top cuisine from the CSV file
result = top_cuisines_percentage(csv_file_path, cuisine_column_name)

# To Print the result
print("Percentage of Restaurants for Each Top Cuisine:", result)


## Task 2 : City Analysis

### Identify the city with the highest number of restaurants in the dataset.

In [None]:
import pandas as pd
from collections import Counter

def city_with_highest_restaurants(csv_file_path, city_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Extract the column containing cities
    city_data = df[city_column_name].tolist()

    # To Count the occurrences of each city in the dataset
    city_counter = Counter(city_data)

    # To Get the city with the highest number of restaurants
    top_city = city_counter.most_common(1)[0][0]
    num_restaurants = city_counter.most_common(1)[0][1]

    return top_city, num_restaurants

csv_file_path = 'Dataset .csv'
city_column_name = 'City'

# To Get the city with the highest number of restaurants from the CSV file
result_city, result_num_restaurants = city_with_highest_restaurants(csv_file_path, city_column_name)

# To Print the result
print(f"The city with the highest number of restaurants is {result_city} with {result_num_restaurants} restaurants.")


### Determine the city with the highest average rating. 

In [None]:
import pandas as pd

def average_rating_by_city(csv_file_path, city_column_name, rating_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Group the DataFrame by city and calculate the average rating for each city
    average_ratings_by_city = df.groupby(city_column_name)[rating_column_name].mean()

    return average_ratings_by_city

csv_file_path = 'Dataset .csv'
city_column_name = 'City'
rating_column_name = 'Aggregate rating'

# To Get the average rating for restaurants in each city from the CSV file
result = average_rating_by_city(csv_file_path, city_column_name, rating_column_name)

# To Print the result
print("Average Rating for Restaurants in Each City:")
print(result)


### Determine the city with the highest average rating.

In [None]:
import pandas as pd

def city_with_highest_average_rating(csv_file_path, city_column_name, rating_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Group the DataFrame by city and calculate the average rating for each city
    average_ratings_by_city = df.groupby(city_column_name)[rating_column_name].mean()

    # To Find the city with the highest average rating
    top_city = average_ratings_by_city.idxmax()
    highest_average_rating = average_ratings_by_city.max()

    return top_city, highest_average_rating

csv_file_path = 'Dataset .csv'
city_column_name = 'City'
rating_column_name = 'Aggregate rating'

# To Get the city with the highest average rating from the CSV file
result_city, result_highest_average_rating = city_with_highest_average_rating(csv_file_path, city_column_name, rating_column_name)

# To Print the result
print(f"The city with the highest average rating is {result_city} with an average rating of {result_highest_average_rating:.2f}.")


## Task 3 : Price Range Distribution

### Create a histogram or bar chart to visualize the distribution of price ranges among the restaurants.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def visualize_price_distribution(csv_file_path, price_range_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Plot a histogram for the distribution of price range
    plt.figure(figsize=(8, 6))
    plt.hist(df[price_range_column_name], bins=range(1, 7), align='left', color='skyblue', edgecolor='black', rwidth=0.8)

    plt.title('Distribution of Price Range Among Restaurants')
    plt.xlabel('Price Range')
    plt.ylabel('Number of Restaurants')
    plt.xticks(range(1, 7))  # Set x-axis ticks to match the price range values
    plt.show()

csv_file_path = 'Dataset .csv'
price_range_column_name = 'Price range'

# To Visualize the distribution of price range among restaurants using a histogram
visualize_price_distribution(csv_file_path, price_range_column_name)


### Calculate the percentage of restaurants in each price range category. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

def calculate_percentage_by_price_range(csv_file_path, price_range_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Calculate the percentage of restaurants in each price range category
    total_restaurants = len(df)
    percentage_by_price_range = df[price_range_column_name].value_counts() / total_restaurants * 100

    return percentage_by_price_range

def visualize_percentage_by_price_range(percentage_by_price_range):
    # To Plot a bar chart for the percentage of restaurants in each price range category
    plt.figure(figsize=(8, 6))
    percentage_by_price_range.sort_index().plot(kind='bar', color='skyblue')

    plt.title('Percentage of Restaurants in Each Price Range Category')
    plt.xlabel('Price Range')
    plt.ylabel('Percentage of Restaurants')
    plt.xticks(rotation=0)  # Rotate x-axis labels if needed
    plt.show()

csv_file_path = 'Dataset .csv'
price_range_column_name = 'Price range'

# To Calculate the percentage of restaurants in each price range category
percentage_by_price_range = calculate_percentage_by_price_range(csv_file_path, price_range_column_name)

# To Print the result
print("Percentage of Restaurants in Each Price Range Category:")
print(percentage_by_price_range)

# To Visualize the percentage of restaurants in each price range category
visualize_percentage_by_price_range(percentage_by_price_range)


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# To Load the dataset from CSV file
file_path = 'Dataset .csv'  
data = pd.read_csv(file_path)

# Assuming the CSV file has a column named 'Prize Range' containing the prize ranges of restaurants
# To Count the occurrences of each prize range
prize_range_counts = data['Price range'].value_counts()

# Plotting the pie chart
plt.figure(figsize=(8, 8))  # Set the figure size
plt.pie(prize_range_counts, labels=prize_range_counts.index, autopct='%1.1f%%', startangle=140)
plt.title('Distribution of Prize Ranges among Restaurants')
plt.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle
plt.tight_layout()
plt.show()

## Task 4 - Online Delivery

### Determine the percentage of restaurants that offer online delivery.

In [None]:
import pandas as pd

def percentage_of_restaurants_with_online_delivery(csv_file_path, online_delivery_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Convert "yes" and "no" values to boolean (True/False)
    df[online_delivery_column_name] = df[online_delivery_column_name].apply(lambda x: True if x.lower() == 'yes' else False)

    # To Calculate the percentage of restaurants offering online delivery
    total_restaurants = len(df)
    restaurants_with_online_delivery = df[online_delivery_column_name].sum()
    percentage_with_online_delivery = (restaurants_with_online_delivery / total_restaurants) * 100

    return percentage_with_online_delivery

csv_file_path = 'Dataset .csv'
online_delivery_column_name = 'Has Online delivery'

# To Calculate the percentage of restaurants offering online delivery
result = percentage_of_restaurants_with_online_delivery(csv_file_path, online_delivery_column_name)

# To Print the result
print(f"The percentage of restaurants offering online delivery is: {result:.2f}%.")


### Compare the average ratings of restaurants with and without online delivery.

In [None]:
import pandas as pd

def compare_average_ratings(csv_file_path, online_delivery_column_name, rating_column_name):
    # To Read the CSV file into a DataFrame
    df = pd.read_csv(csv_file_path)

    # To Convert "yes" and "no" values to boolean (True/False)
    df[online_delivery_column_name] = df[online_delivery_column_name].apply(lambda x: True if x.lower() == 'yes' else False)

    # To Group the DataFrame by online delivery status and calculate the average rating for each group
    average_ratings_by_delivery = df.groupby(online_delivery_column_name)[rating_column_name].mean()

    return average_ratings_by_delivery

csv_file_path = 'Dataset .csv'
online_delivery_column_name = 'Has Online delivery'
rating_column_name = 'Aggregate rating'

# To Compare the average ratings of restaurants with and without online delivery
result = compare_average_ratings(csv_file_path, online_delivery_column_name, rating_column_name)

# To Print the result
print("Average Ratings Comparison:")
print(result)
