**Exploring the Impact of Restaurant Industry Performance on Stock Prices: A Case Study of Zomato in Bangalore**

In [None]:
%pwd


'/content'

In [None]:
from google.colab import drive
drive.mount('/content/drive/MyDrive/DataSet') # Mount to the directory containing your file

ValueError: Mountpoint must be in a directory that exists

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load Zomato Bangalore Restaurant Data
df_restaurants = pd.read_csv('drive/MyDrive/DataSet/Zomato_Bangalore_Dataset.csv')

# Load Zomato Stock Price Data
df_stock = pd.read_csv('drive/MyDrive/DataSet/Zomato_stockprice_Dataset.csv')

# ----------- Data Preprocessing for Zomato Restaurant Data -----------

# Drop irrelevant columns from the restaurant data
df_restaurants = df_restaurants.drop(['url', 'address', 'phone', 'menu_item', 'dish_liked', 'reviews_list'], axis=1)

# Handle missing or unusual rating values
def handle_rate(value):
    if value == 'NEW' or value == '-':
        return np.nan
    else:
        return float(str(value).split('/')[0])

df_restaurants['rate'] = df_restaurants['rate'].apply(handle_rate)

# Fill missing ratings with the mean value
df_restaurants['rate'].fillna(df_restaurants['rate'].mean(), inplace=True)

# Convert the 'approx_cost(for two people)' to numeric by handling commas
def handle_cost(value):
    return float(str(value).replace(',', '')) if ',' in str(value) else float(value)

df_restaurants['Cost2plates'] = df_restaurants['approx_cost(for two people)'].apply(handle_cost)

# Drop unnecessary columns after renaming them for clarity
df_restaurants = df_restaurants.drop(['listed_in(city)'], axis=1)
df_restaurants.rename(columns={'approx_cost(for two people)': 'Cost2plates', 'listed_in(type)': 'Type'}, inplace=True)

# ----------- Grouping Less Frequent Categories -----------

# Group less frequent restaurant types
rest_types = df_restaurants['rest_type'].value_counts(ascending=False)
rest_types_less_than_1000 = rest_types[rest_types < 1000]

def handle_rest_type(value):
    return 'others' if value in rest_types_less_than_1000 else value

df_restaurants['rest_type'] = df_restaurants['rest_type'].apply(handle_rest_type)

# Group less frequent locations
locations = df_restaurants['location'].value_counts(ascending=False)
locations_less_than_300 = locations[locations < 300]

def handle_location(value):
    return 'others' if value in locations_less_than_300 else value

df_restaurants['location'] = df_restaurants['location'].apply(handle_location)

# Group less frequent cuisines
cuisines = df_restaurants['cuisines'].value_counts(ascending=False)
cuisines_less_than_100 = cuisines[cuisines < 100]

def handle_cuisines(value):
    return 'others' if value in cuisines_less_than_100 else value

df_restaurants['cuisines'] = df_restaurants['cuisines'].apply(handle_cuisines)

# ----------- Data Preprocessing for Zomato Stock Price Data -----------

# Convert the 'Date' column to datetime format
df_stock['Date'] = pd.to_datetime(df_stock['Date'])

# Sort the stock price data by date
df_stock = df_stock.sort_values('Date')

# Feature scaling for the 'Close' price using MinMaxScaler
scaler = MinMaxScaler(feature_range=(0, 1))
df_stock['Close'] = scaler.fit_transform(df_stock[['Close']])

# ----------- Aggregating Restaurant Data -----------

# Calculate daily/weekly averages of restaurant ratings
df_restaurants['date'] = pd.to_datetime(df_restaurants['review_date'])  # replace with the actual date column
daily_avg_ratings = df_restaurants.groupby(df_restaurants['date'].dt.date)['rate'].mean()

# Sum up the total number of votes for each day/week
daily_total_votes = df_restaurants.groupby(df_restaurants['date'].dt.date)['votes'].sum()

# Calculate the ratio of online orders to table bookings
daily_online_order_ratio = df_restaurants.groupby(df_restaurants['date'].dt.date)['online_order'].apply(lambda x: (x == 'Yes').sum() / len(x))

# Merge the restaurant data with the stock price data on the date
df_merged = pd.merge(df_stock, daily_avg_ratings, left_on='Date', right_index=True, how='inner')
df_merged = pd.merge(df_merged, daily_total_votes, left_on='Date', right_index=True, how='inner')
df_merged = pd.merge(df_merged, daily_online_order_ratio, left_on='Date', right_index=True, how='inner')

# Rename columns for clarity
df_merged.rename(columns={'rate': 'AvgRating', 'votes': 'TotalVotes', 'online_order': 'OnlineOrderRatio'}, inplace=True)

# Final dataset ready for analysis or modeling
print(df_merged.head())


  df_stock['Date'] = pd.to_datetime(df_stock['Date'])


KeyError: 'review_date'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
print(df_restaurants.columns)

NameError: name 'df_restaurants' is not defined