#                                     ____Zomato Dining Restaurants Analysis In Chennai____

In [1]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import time
from webdriver_manager.chrome import ChromeDriverManager
# Set up Selenium
options = Options()
# options.add_argument("--headless")  # Uncomment to run in headless mode
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)

def scrape_product_details(product_url):
    driver.get(product_url)
    time.sleep(3)  # Increased wait time for page to load
    
    # Extract product details
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    
    # Modify these selectors based on the product page structure
    name = soup.find('h1', class_='sc-7kepeu-0 sc-iSDuPN fwzNdh').text.strip() if soup.find('h1', class_='sc-7kepeu-0 sc-iSDuPN fwzNdh') else 'N/A'
    cuisine = soup.find('div', class_='sc-gVyKpa fXdtVd').text.strip() if soup.find('div', class_='sc-gVyKpa fXdtVd') else 'N/A'
    address = soup.find('a', class_='sc-clNaTc vNCcy').text.strip() if soup.find('a', class_='sc-clNaTc vNCcy') else 'N/A'
    opening = soup.find('span', class_='sc-kasBVs dfwCXs').text.strip() if soup.find('span', class_='sc-kasBVs dfwCXs') else 'N/A'
    rating = soup.find('div', class_='sc-1q7bklc-1 cILgox').text.strip() if soup.find('div', class_='sc-1q7bklc-1 cILgox') else 'N/A'
    average = soup.find('div', class_='sc-bEjcJn ePRRqr').text.strip() if soup.find('div', class_='sc-bEjcJn ePRRqr') else 'N/A'
   
    return {
        'Product Name': name,
        'Cuisines': cuisine,
        'Address': address,
        'Opening and Closing Time': opening,
        'Ratings': rating,
        'Average Cost': average
    }

def scroll_to_load_content():
    # Scroll to the bottom of the page repeatedly with a delay until no new content is loaded
    last_height = driver.execute_script("return document.body.scrollHeight")
    while True:
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(3)  # Increased wait time for new content to load
        new_height = driver.execute_script("return document.body.scrollHeight")
        if new_height == last_height:  # If no new content is loaded, break
            break
        last_height = new_height

def scrape_books(url):
    driver.get(url)
    time.sleep(3)  # Initial wait for the page to load fully
    book_data = []

    # Scroll and load all content
    scroll_to_load_content()

    # Parse the loaded page content with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    books = soup.find_all('div', class_='sc-1mo3ldo-0 sc-jGkVzM BXbKf')

    for book in books:
        try:
            # Check if the product link element exists before trying to get 'href'
            link_tag = book.find('a', class_='sc-hPeUyl cKQNlu')
            if link_tag and 'href' in link_tag.attrs:
                product_link = link_tag['href']
                full_product_url = 'https://www.zomato.com' + product_link

                # Scrape product details
                product_details = scrape_product_details(full_product_url)
                print(product_details)
                book_data.append(product_details)
            else:
                print("Product link not found for this book.")

        except Exception as e:
            print(f"Error processing book: {e}")

    # Convert list to DataFrame
    df = pd.DataFrame(book_data)
    return df

# Start scraping from the first page
df_books = scrape_books('https://www.zomato.com/chennai/dine-out')
driver.quit()  # Close the browser when done

# Save the DataFrame to a CSV file
df_books.to_csv('zomato_books1.csv', index=False)
print("Data saved to zomato_books1.csv")


Product link not found for this book.
{'Product Name': 'Secret Story', 'Cuisines': 'North Indian, Chinese, Italian, Fast Food', 'Address': 'Nungambakkam, Chennai', 'Opening and Closing Time': '12noon – 12midnight (Today)', 'Ratings': '4.2', 'Average Cost': '₹2,000 for two'}
{'Product Name': 'TapasMe', 'Cuisines': 'Asian, European, Continental, Desserts, Beverages', 'Address': 'Anna Nagar West, Chennai', 'Opening and Closing Time': '12noon – 11:30pm (Today)', 'Ratings': '4.5', 'Average Cost': '₹1,500 for two'}
{'Product Name': 'Lord of the Drinks', 'Cuisines': 'Italian, Continental, North Indian, Japanese, Desserts, Beverages', 'Address': 'Nungambakkam, Chennai', 'Opening and Closing Time': '12noon – 12midnight (Today)', 'Ratings': '4.2', 'Average Cost': '₹2,500 for two'}
{'Product Name': 'Goldman’s Steakhouse', 'Cuisines': 'American, Mexican, Steak, Fast Food, Beverages, Desserts', 'Address': 'Nungambakkam, Chennai', 'Opening and Closing Time': '11am – 11:30pm (Today)', 'Ratings': '4.2

In [5]:
import pandas as pd

# read Excel
data = pd.read_excel('zomato_books1.xlsx')
data

Unnamed: 0,Restaurant Name,Primary Cuisines,Secondary Cuisines,Other Cuisines,Address,Opening Time,Closing Time,Ratings,Average Cost
0,Secret Story,North Indian,Chinese,"Italian, Fast Food",Nungambakkam,12:00:00,00:00:00,4.2,2000
1,TapasMe,Asian,European,"Continental, Desserts, Beverages,",Anna Nagar West,12:00:00,23:00:00,4.5,1500
2,Lord of the Drinks,Italian,Continental,"North Indian, Japanese, Desserts, Beverages",Nungambakkam,12:00:00,00:00:00,4.2,2500
3,Goldmanâ€™s Steakhouse,American,Mexican,"Steak, Fast Food, Beverages, Desserts",Nungambakkam,11:00:00,23:30:00,4.2,2500
4,Sin & Tonic,North Indian,Italian,"BBQ, Wraps, Pizza, Desserts, Bar Food,",T. Nagar,12:00:00,00:00:00,4.5,2700
...,...,...,...,...,...,...,...,...,...
161,Melt,Street Food,Fast Food,"Sandwich, Beverages",Thousand Lights,00:00:00,16:15:00,3.4,250
162,Terrace - By The Living Room,Continental,Italian,"Desserts, Beverages",Anna Nagar East,16:00:00,23:00:00,4.3,2000
163,Arabian Hut,Fast Food,BBQ,"Shawarma, Biryani",Phoenix Market City Velachery,10:00:00,22:30:00,3.4,700
164,Zodiac - The Accord Metropolitan,Finger Food,Nil,Nil,The Accord Metropolitan T. Nagar,16:00:00,00:00:00,3.9,1600


In [6]:
data.tail(100)

Unnamed: 0,Restaurant Name,Primary Cuisines,Secondary Cuisines,Other Cuisines,Address,Opening Time,Closing Time,Ratings,Average Cost
66,Burger King,Burger,Fast Food,"Beverages,",Forum Vijaya Mall Vadapalani,10:00:00,23:00:00,3.9,300
67,Chinese Wok,Chinese,Momos,Nil,Porur,00:00:00,11:00:00,4.0,700
68,Una Villa,Italian,Pasta,"Sandwich, Pizza, Burger, Beverages",Nungambakkam,10:00:00,22:00:00,0.0,1000
69,Royal Palate,Continental,European,"South Indian, North Indian, Chinese, Desserts",Ambica Empire Vadapalani,00:00:00,00:00:00,3.5,1000
70,Truly Herbivore,North Indian,South Indian,"Continental, Italian, Maharashtrian, Desserts...",Ashok Nagar,00:00:00,00:00:00,4.3,1000
...,...,...,...,...,...,...,...,...,...
161,Melt,Street Food,Fast Food,"Sandwich, Beverages",Thousand Lights,00:00:00,16:15:00,3.4,250
162,Terrace - By The Living Room,Continental,Italian,"Desserts, Beverages",Anna Nagar East,16:00:00,23:00:00,4.3,2000
163,Arabian Hut,Fast Food,BBQ,"Shawarma, Biryani",Phoenix Market City Velachery,10:00:00,22:30:00,3.4,700
164,Zodiac - The Accord Metropolitan,Finger Food,Nil,Nil,The Accord Metropolitan T. Nagar,16:00:00,00:00:00,3.9,1600


In [1]:
#Load the scraped data into MySQL

In [9]:
#pip install mysql-connector-python pandas




In [7]:
import pandas as pd
import mysql.connector
from mysql.connector import errorcode

# Load the cleaned dataset
data = pd.read_excel('zomato_books1.xlsx')

# MySQL database connection
config = {
    "user": "root",
    "password": "Mysql@1992",
    "host": "localhost",
    "database": "Zomato",
    "raise_on_warnings": True
}

try:
    # Connect to the database
    connection = mysql.connector.connect(**config)
    cursor = connection.cursor()

    # Insert data into the Products table
    for index, row in data.iterrows():
        cursor.execute("""
            INSERT INTO Restaurants (`Restaurant Name`, `Primary Cuisines`, `Secondary Cuisines`, `Other Cuisines`, `Address`, `Opening Time`, `Closing Time`, `Ratings`, `Average Cost`)
            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
        """, (
            row['Restaurant Name'],
            row['Primary Cuisines'],  
            row['Secondary Cuisines'],
            row['Other Cuisines'],
            row['Address'],  
            row['Opening Time'],
            row['Closing Time'],
            row['Ratings'],
            row['Average Cost']
        ))

    # Commit the transaction
    connection.commit()

except mysql.connector.Error as err:
    if err.errno == errorcode.ER_ACCESS_DENIED_ERROR:
        print("Something is wrong with your user name or password")
    elif err.errno == errorcode.ER_BAD_DB_ERROR:
        print("Database does not exist")
    else:
        print(err)
finally:
    cursor.close()
    connection.close()