In [1]:
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


In [2]:
# Load the data
data = pd.read_csv('Dataset .csv')

print(data.shape) 

(9551, 21)


In [None]:

# Visualize the locations of restaurants on a
# map using latitude and longitude information.
# Initialize a map centered around the average latitude and longitude


fig = px.scatter_mapbox(data, 
                        lat="Latitude", 
                        lon="Longitude", 
                        hover_name="Restaurant Name",
                        hover_data=["Aggregate rating"],
                        color="Aggregate rating",
                        size_max=10, 
                        zoom=5, 
                        mapbox_style="open-street-map")

fig.show()



In [None]:
# Analyze the distribution of restaurants
#across different cities or countries.

# Count restaurants in each country
country_counts = data["Country Code"].value_counts().head(10)  # Top 10 countries

# Plot
plt.figure(figsize=(12, 6))
sns.barplot(x=country_counts.index, y=country_counts.values, palette="viridis")
plt.xlabel("Country Code")
plt.ylabel("Number of Restaurants")
plt.title("Top 10 Countries with the Highest Number of Restaurants")
plt.show()

In [None]:
# Count restaurants in each city
city_counts = data["City"].value_counts().head(10)  # Top 10 cities

# Plot
plt.figure(figsize=(14, 6))
sns.barplot(x=city_counts.index, y=city_counts.values, palette="coolwarm")
plt.xlabel("City")
plt.ylabel("Number of Restaurants")
plt.title("Top 10 Cities with the Highest Number of Restaurants")
plt.xticks(rotation=45)
plt.show()


In [None]:
# Determine if there is any correlation
#between the restaurant's location and its rating.



# Scatter plot for Latitude vs Rating
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data["Latitude"], y=data["Aggregate rating"], alpha=0.5, color='blue')
plt.xlabel("Latitude")
plt.ylabel("Aggregate Rating")
plt.title("Latitude vs. Aggregate Rating")
plt.show()




In [None]:
# Scatter plot for Longitude vs Rating
plt.figure(figsize=(10, 5))
sns.scatterplot(x=data["Longitude"], y=data["Aggregate rating"], alpha=0.5, color='red')
plt.xlabel("Longitude")
plt.ylabel("Aggregate Rating")
plt.title("Longitude vs. Aggregate Rating")
plt.show()

In [None]:
# Correlation Matrix (Numerical Data)

corr_matrix = data[["Latitude", "Longitude", "Aggregate rating"]].corr()

# Display correlation matrix
print("\nCorrelation Matrix:\n", corr_matrix)

In [None]:
# City-wise Rating Analysis 

city_ratings = data.groupby("City")["Aggregate rating"].mean().sort_values(ascending=False).head(10)

# Plot
plt.figure(figsize=(12, 6))
sns.barplot(x=city_ratings.index, y=city_ratings.values, palette="magma")
plt.xlabel("City")
plt.ylabel("Average Aggregate Rating")
plt.title("Top 10 Cities with Highest Average Ratings")
plt.xticks(rotation=45)
plt.show()
