- [LinkedIn - Mohamad Ehthesham](https://www.linkedin.com/in/-mohamad-ehthesham/)


## Web Scraping: Extracting Restaurant Details from Talabat UAE

## **Objective**
Extract the following fields for all restaurants listed on **[Talabat UAE](https://www.talabat.com/uae)**:

### **Fields to be Extracted:**
- **Restaurant Name**  
- **Cuisine Type(s)**  
- **Location/Area** 
- **Best Selling Dishes**
- **Ratings** 
- **Reviews**


In [42]:
!pip install selenium

Collecting selenium
  Downloading selenium-4.27.1-py3-none-any.whl.metadata (7.1 kB)
Collecting trio~=0.17 (from selenium)
  Downloading trio-0.28.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.9 (from selenium)
  Downloading trio_websocket-0.11.1-py3-none-any.whl.metadata (4.7 kB)
Collecting attrs>=23.2.0 (from trio~=0.17->selenium)
  Downloading attrs-24.3.0-py3-none-any.whl.metadata (11 kB)
Collecting outcome (from trio~=0.17->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.9->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Collecting h11<1,>=0.9.0 (from wsproto>=0.14->trio-websocket~=0.9->selenium)
  Downloading h11-0.14.0-py3-none-any.whl.metadata (8.2 kB)
Downloading selenium-4.27.1-py3-none-any.whl (9.7 MB)
   ---------------------------------------- 0.0/9.7 MB ? eta -:--:--
   ---------------------------------------- 0.1/9.7 MB 2.6 MB/s eta 0:00:04
   

In [43]:
!pip install webdriver_manager

Collecting webdriver_manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl (27 kB)
Installing collected packages: webdriver_manager
Successfully installed webdriver_manager-4.0.2


#### Import all the necessary libraries

In [1]:

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import pprint
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, StaleElementReferenceException

import time
import re

import pandas as pd


Some of the above packages must be installed before running them or else below code will give errors

#### Main Code for fetching data from website with reviews

In [8]:
service = Service(r"D:\New folder (2)\chromedriver-win64\chromedriver-win64\chromedriver.exe") #selenium webdriver needs to be downloaded
driver = webdriver.Chrome(service=service)

url = "https://www.talabat.com/uae/restaurants"
driver.get(url)

restaurant_data = []

def fetch_restaurants():
    return driver.find_elements(By.XPATH, "//div[@data-testid='vendor']//a")

page_limit = 5
page_number = 1
start_time = time.time()

while page_number <= page_limit:
    try:
        WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//p[@data-testid='vendor-name']")))

        restaurants = fetch_restaurants()
        
        for index in range(len(restaurants)):
            try:
                restaurants = fetch_restaurants()
                if index >= len(restaurants):
                    print("Index out of range after re-fetching elements")
                    break

                restaurant = restaurants[index]
                restaurant_name = restaurant.text

                driver.execute_script("arguments[0].scrollIntoView(true);", restaurant)
                time.sleep(3)

                restaurant.click()
                time.sleep(8)  

                if "Page not found" in driver.page_source:
                    print(f"Page not found for {restaurant_name}, navigating back to the main page and skipping...")
                    driver.get(url)
                    WebDriverWait(driver, 15).until(
                        EC.presence_of_element_located((By.XPATH, "//div[@data-testid='vendor']"))
                    )
                    time.sleep(7)
                    continue

                descript = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, "//div[@class='markdown-rich-text-block']"))
                )
                
                rating = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, "//span[contains(@class, 'mr-1')]"))
                )

                rating_number = re.search(r'\d+\.\d+', rating.text).group()
                cuisine_text = re.sub(r'[:\d\.]+', '', rating.text).strip()
                descript_text = (descript.text).strip()
                reviews = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, "//span[@data-testid='brand-total-reviews']"))
                ).text.strip()

                restaurant_data.append((restaurant_name, rating_number, cuisine_text, descript_text, reviews))
                print(f"Fetched data for - {restaurant_name}")

                driver.back()
                WebDriverWait(driver, 15).until(
                    EC.presence_of_element_located((By.XPATH, "//div[@data-testid='vendor']"))
                )
                time.sleep(7)

            except Exception as e:
                print(f"Error fetching data for {restaurant_name}: {e}")
                driver.get(url)  
                WebDriverWait(driver, 15).until(
                    EC.presence_of_element_located((By.XPATH, "//div[@data-testid='vendor']"))
                )
                time.sleep(7)
                continue

        WebDriverWait(driver, 10).until(EC.invisibility_of_element_located((By.CSS_SELECTOR, ".global-spinner")))
        next_button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//a[@aria-label='Go to next page']")))
        print(f"Clicking on the 'Next' button on page {page_number}...")
        next_button.click()

        WebDriverWait(driver, 20).until(EC.presence_of_all_elements_located((By.XPATH, "//p[@data-testid='vendor-name']")))

        page_number += 1
        print(f"Moved to Page {page_number}")

    except Exception as e:
        print(f"Error on page {page_number}: {e}")
        driver.get(url)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.XPATH, "//div[@data-testid='vendor']"))
        )
        time.sleep(7)
        continue

end_time = time.time()
total_time_taken = end_time - start_time

print('Fetched till page:', page_number - 1)
print('Total restaurants data fetched:', len(restaurant_data))
print('Time taken to fetch data:', total_time_taken, 'seconds')

for data in restaurant_data:
    print(data)


Fetched data for - Shrimp Kingdom
Seafood,International
Fetched data for - Pasta Di Risotto Italia - Italian Bistro
Italian,Pasta,Healthy
Fetched data for - Wawa Dining
Japanese,Asian,Ramen,Korean
Fetched data for - WH#63z Eatery
Indian,International
Fetched data for - So Saj
Bakery,Arabic,Beverages
Fetched data for - MAAR Sushi
Sushi,Seafood,Asian
Fetched data for - Sticky Wingz
Wings,Bubble tea,Burgers
Fetched data for - Gorilla Burger
Burgers,Fast Food,Sliders
Fetched data for - DRVN Coffee by Porsche
Coffee,Acai,Breakfast,Cafe,Beverages
Fetched data for - TOK Premium Restaurant
Korean,Asian
Fetched data for - Delhi Lahore Restaurant
Asian,Indian,Pakistani
Fetched data for - Jabal Al Noor Mandi
Arabic,Mandi
Error fetching data for Petzone
Pet Shop: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6E06380D5+2992373]
	(No symbol) [0x00007FF6E02CBFD0]
	(No symbol) [0x00007FF6E016590A]
	(No symbol) [0x00007FF6E01B926E]
	(No symbol) [0x00007FF6E01B955C]
	(No symbol) [0x00007FF6E02027D7

Error fetching data for ADRIFT Burger Bar
Burgers,Fast Food,American: Message: 
Stacktrace:
	GetHandleVerifier [0x00007FF6E06380D5+2992373]
	(No symbol) [0x00007FF6E02CBFD0]
	(No symbol) [0x00007FF6E016590A]
	(No symbol) [0x00007FF6E01B926E]
	(No symbol) [0x00007FF6E01B955C]
	(No symbol) [0x00007FF6E02027D7]
	(No symbol) [0x00007FF6E01DF3AF]
	(No symbol) [0x00007FF6E01FF584]
	(No symbol) [0x00007FF6E01DF113]
	(No symbol) [0x00007FF6E01AA918]
	(No symbol) [0x00007FF6E01ABA81]
	GetHandleVerifier [0x00007FF6E0696A2D+3379789]
	GetHandleVerifier [0x00007FF6E06AC32D+3468109]
	GetHandleVerifier [0x00007FF6E06A0043+3418211]
	GetHandleVerifier [0x00007FF6E042C78B+847787]
	(No symbol) [0x00007FF6E02D757F]
	(No symbol) [0x00007FF6E02D2FC4]
	(No symbol) [0x00007FF6E02D315D]
	(No symbol) [0x00007FF6E02C2979]
	BaseThreadInitThunk [0x00007FF98F6FE8D7+23]
	RtlUserThreadStart [0x00007FF99049FBCC+44]

Fetched data for - Summer Palace Chinese Restaurant
Thai,Asian,Bubble tea,Coffee,Chinese
Fetched data f

KeyboardInterrupt: 

Further restaurant data could have been extracted, but due to the time-consuming process, I had to limit it to the first 132 restaurants.

In [10]:
restaurant_data

[('Shrimp Kingdom\nSeafood,International',
  '4.0',
  '',
  'Shrimp Kingdom is a restaurant located in UAE, serving a selection of Seafood, International that delivers across Jumeirah Beach Residence - JBR.\nThey have been reviewed 197 times by talabat users, with a rating of 4.',
  '136 Reviews'),
 ('Pasta Di Risotto Italia - Italian Bistro\nItalian,Pasta,Healthy',
  '4.0',
  '',
  'Pasta Di Risotto Italia - Italian Bistro is a restaurant located in UAE, serving a selection of Italian, Pasta, Healthy that delivers across Al Warqa 4, Business Bay, Business Bay, Dubai Media City and Madinat Khalifa - A.\nTheir best selling dishes are Penne Di Alfredo Pasta, Penne Di Pink Pasta, Spaghetti Di Bolognese Pasta and Lasagna, although they have a variety of dishes and meals to choose from like Italian, Pasta, Healthy.\nThey have been reviewed 2101 times by talabat users, with a rating of 4.',
  '1510 Reviews'),
 ('Wawa Dining\nJapanese,Asian,Ramen,Korean',
  '4.5',
  '',
  'Wawa Dining is a re

#### Convert the Raw form data into DataFrame

In [18]:
cleaned_data = []
for name_cuisine, rating, cuisine_text, description,reviews in restaurant_data:
    name, cuisines = name_cuisine.split('\n', 1)
    cleaned_data.append((name.strip(), cuisines.strip(), rating, description.strip(),reviews))

df = pd.DataFrame(cleaned_data, columns=['Restaurant Name', 'Cuisines', 'Rating', 'Description','reviews'])

In [19]:
df

Unnamed: 0,Restaurant Name,Cuisines,Rating,Description,reviews
0,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136 Reviews
1,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510 Reviews
2,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27 Reviews
3,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0 Reviews
4,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0 Reviews
...,...,...,...,...,...
127,Supermarche,"Grocery,Convenience Store,Supermarket",4.5,"Supermarche is a grocery store near you, locat...",912 Reviews
128,Better Biryani,"Chicken,Desserts,Beverages",3.5,"Better Biryani is a restaurant located in UAE,...",4 Reviews
129,Origins Kitchen,"Burgers,Grills,International",4.5,Origins Kitchen is a restaurant located in UAE...,2 Reviews
130,QSL Juices,"Juices,Beverages",4.0,"QSL Juices is a restaurant located in UAE, ser...",2 Reviews


In [20]:
df.tail()

Unnamed: 0,Restaurant Name,Cuisines,Rating,Description,reviews
127,Supermarche,"Grocery,Convenience Store,Supermarket",4.5,"Supermarche is a grocery store near you, locat...",912 Reviews
128,Better Biryani,"Chicken,Desserts,Beverages",3.5,"Better Biryani is a restaurant located in UAE,...",4 Reviews
129,Origins Kitchen,"Burgers,Grills,International",4.5,Origins Kitchen is a restaurant located in UAE...,2 Reviews
130,QSL Juices,"Juices,Beverages",4.0,"QSL Juices is a restaurant located in UAE, ser...",2 Reviews
131,Qrew,"Desserts,Beverages,Snacks",3.5,"Qrew is a restaurant located in UAE, serving a...",27 Reviews


#### Removing any unnecessary characters from Description Column

In [21]:
df['Description']=df['Description'].str.replace('\n','',regex=False)
df

Unnamed: 0,Restaurant Name,Cuisines,Rating,Description,reviews
0,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136 Reviews
1,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510 Reviews
2,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27 Reviews
3,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0 Reviews
4,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0 Reviews
...,...,...,...,...,...
127,Supermarche,"Grocery,Convenience Store,Supermarket",4.5,"Supermarche is a grocery store near you, locat...",912 Reviews
128,Better Biryani,"Chicken,Desserts,Beverages",3.5,"Better Biryani is a restaurant located in UAE,...",4 Reviews
129,Origins Kitchen,"Burgers,Grills,International",4.5,Origins Kitchen is a restaurant located in UAE...,2 Reviews
130,QSL Juices,"Juices,Beverages",4.0,"QSL Juices is a restaurant located in UAE, ser...",2 Reviews


#### Checking for duplicates rows

In [22]:
df.duplicated().sum()

0

#### Checking for null values

In [23]:
df.isnull().sum()

Restaurant Name    0
Cuisines           0
Rating             0
Description        0
reviews            0
dtype: int64

#### Cleaning reviews column

In [31]:
df['reviews']=df['reviews'].replace('Reviews','',regex=True)
df['reviews']=df['reviews'].str.rstrip()
df.head()

Unnamed: 0,Restaurant Name,Cuisines,Rating,Description,reviews
0,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136
1,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510
2,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27
3,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0
4,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0


#### Assigning restaurant id for possible future req

In [32]:
#assigning restaurant id

df.index=range(1,len(df)+1)
df['Restaurant ID']=['R'+str(i) for i in df.index]

In [34]:
df.head()

Unnamed: 0,Restaurant Name,Cuisines,Rating,Description,reviews,Restaurant ID
1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,R1
2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,R2
3,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27,R3
4,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0,R4
5,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0,R5


#### Moving thsi ID column to first position


In [35]:
fst_col=df.pop('Restaurant ID')
df.insert(0,'Restaurant ID',fst_col)

In [38]:
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews
1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136
2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510
3,R3,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27
4,R4,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0
5,R5,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0


#### Extracting Locations Served from Restaurant Description

In [40]:
def get_location(desciption):
    location = re.search(r'across (.*?)\.', desciption)
    if location:
        location_text = location.group(1)
        return location_text
    else:
        return "Location not found"
df['Location_served']=df['Description'].apply(get_location)

In [41]:
df

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,Location_served
1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,Jumeirah Beach Residence - JBR
2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ..."
3,R3,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27,Al Barsha 1
4,R4,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0,Al Quoz Industrial Area 3
5,R5,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0,Al Quoz 1
...,...,...,...,...,...,...,...
128,R128,Supermarche,"Grocery,Convenience Store,Supermarket",4.5,"Supermarche is a grocery store near you, locat...",912,"Business Bay, Oud Metha and Oud Metha"
129,R129,Better Biryani,"Chicken,Desserts,Beverages",3.5,"Better Biryani is a restaurant located in UAE,...",4,Al Quoz Industrial Area 4
130,R130,Origins Kitchen,"Burgers,Grills,International",4.5,Origins Kitchen is a restaurant located in UAE...,2,Muwaileh Commercial
131,R131,QSL Juices,"Juices,Beverages",4.0,"QSL Juices is a restaurant located in UAE, ser...",2,Al Mujarah


#### Extracting best selling dishes from Restaurant Description

In [42]:
def get_best_dishes(desciption):
    best_dish = re.search(r'best selling dishes are (.*?)\ although', desciption)
    if best_dish:
        best_dish_text = best_dish.group(1)
        return best_dish_text 
    else:
        return "text not found"

In [43]:
df['best selling dishes']=df['Description'].apply(get_best_dishes)

In [44]:
df

Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,Location_served,best selling dishes
1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,Jumeirah Beach Residence - JBR,text not found
2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...","Penne Di Alfredo Pasta, Penne Di Pink Pasta, S..."
3,R3,Wawa Dining,"Japanese,Asian,Ramen,Korean",4.5,"Wawa Dining is a restaurant located in UAE, se...",27,Al Barsha 1,"Kimchi Jjigae,"
4,R4,WH#63z Eatery,"Indian,International",4.0,"WH#63z Eatery is a restaurant located in UAE, ...",0,Al Quoz Industrial Area 3,text not found
5,R5,So Saj,"Bakery,Arabic,Beverages",0.0,"So Saj is a restaurant located in UAE, serving...",0,Al Quoz 1,text not found
...,...,...,...,...,...,...,...,...
128,R128,Supermarche,"Grocery,Convenience Store,Supermarket",4.5,"Supermarche is a grocery store near you, locat...",912,"Business Bay, Oud Metha and Oud Metha",text not found
129,R129,Better Biryani,"Chicken,Desserts,Beverages",3.5,"Better Biryani is a restaurant located in UAE,...",4,Al Quoz Industrial Area 4,text not found
130,R130,Origins Kitchen,"Burgers,Grills,International",4.5,Origins Kitchen is a restaurant located in UAE...,2,Muwaileh Commercial,"Maple Granola, Origins Big Breakfast, Buratta ..."
131,R131,QSL Juices,"Juices,Beverages",4.0,"QSL Juices is a restaurant located in UAE, ser...",2,Al Mujarah,"Box of 6 Fresh Juices and Coconut Shake 1 Litre,"


In [48]:
df['Description'][3]

'Wawa Dining is a restaurant located in UAE, serving a selection of Japanese, Asian, Ramen that delivers across Al Barsha 1.Their best selling dishes are Kimchi Jjigae, although they have a variety of dishes and meals to choose from like Japanese, Asian, Ramen.They have been reviewed 46 times by talabat users, with a rating of 4.5.'

### Lets seperate cusinies, best selling dishes, Location_served into row wise

#### Seperating best selling dished into seperate rows for further analysis and from Restaurant Description

In [9]:
def split_dishes(dishes):
    return re.split(r',|\band\b', dishes)

df['best_selling_dishes_list'] = df['best selling dishes'].apply(split_dishes)

df = df.explode('best_selling_dishes_list')

df['best_selling_dishes names'] = df['best_selling_dishes_list'].str.strip()


In [10]:
df.head()

Unnamed: 0.1,Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,Location_served,best selling dishes,best_selling_dishes_list,best_selling_dishes names
0,1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,Jumeirah Beach Residence - JBR,text not found,text not found,text not found
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...","Penne Di Alfredo Pasta, Penne Di Pink Pasta, S...",Penne Di Alfredo Pasta,Penne Di Alfredo Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...","Penne Di Alfredo Pasta, Penne Di Pink Pasta, S...",Penne Di Pink Pasta,Penne Di Pink Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...","Penne Di Alfredo Pasta, Penne Di Pink Pasta, S...",Spaghetti Di Bolognese Pasta,Spaghetti Di Bolognese Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...","Penne Di Alfredo Pasta, Penne Di Pink Pasta, S...",Lasagna,Lasagna


#### Droping Not Required Columns

In [14]:
df.drop(columns=['best selling dishes','best_selling_dishes_list'],axis=1,inplace=True)

In [15]:
df.head()

Unnamed: 0.1,Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,Location_served,best_selling_dishes names
0,1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,Jumeirah Beach Residence - JBR,text not found
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Penne Di Alfredo Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Penne Di Pink Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Spaghetti Di Bolognese Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Lasagna


#### Seperating Locations Served into seperate rows for location based analysis and from Restaurant Description

In [16]:
def locations_served(locations_served):
    return re.split(r',|\band\b', locations_served)

df['locations_served_list'] = df['Location_served'].apply(locations_served)

df = df.explode('locations_served_list')

df['locations_served_list'] = df['locations_served_list'].str.strip()

df.rename(columns={'locations_served_list': 'locations_serve'},inplace=True)


In [17]:
df.head()

Unnamed: 0.1,Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,Location_served,best_selling_dishes names,locations_serve
0,1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,Jumeirah Beach Residence - JBR,text not found,Jumeirah Beach Residence - JBR
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Penne Di Alfredo Pasta,Al Warqa 4
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Penne Di Alfredo Pasta,Business Bay
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Penne Di Alfredo Pasta,Business Bay
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,"Al Warqa 4, Business Bay, Business Bay, Dubai ...",Penne Di Alfredo Pasta,Dubai Media City


#### Droping Not Required Columns

In [18]:
df.drop(columns=['Location_served'],axis=1,inplace=True)

In [19]:
df.head()

Unnamed: 0.1,Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,best_selling_dishes names,locations_serve
0,1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,text not found,Jumeirah Beach Residence - JBR
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Business Bay
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Business Bay
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Dubai Media City


#### Seperating Cuisines into seperate rows for cuisines based analysis and from Restaurant Description

In [20]:
def Cuisines(Cuisines_served):
    return re.split(r',|\band\b', Cuisines_served)

df['Cuisines_served_list'] = df['Cuisines'].apply(Cuisines)

df = df.explode('Cuisines_served_list')

df['Cuisines_served_list'] = df['Cuisines_served_list'].str.strip()

df.rename(columns={'Cuisines_served_list': 'Cuisines_serve'},inplace=True)


In [22]:
df

Unnamed: 0.1,Unnamed: 0,Restaurant ID,Restaurant Name,Cuisines,Rating,Description,reviews,best_selling_dishes names,locations_serve,Cuisines_serve
0,1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,text not found,Jumeirah Beach Residence - JBR,Seafood
0,1,R1,Shrimp Kingdom,"Seafood,International",4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,text not found,Jumeirah Beach Residence - JBR,International
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4,Italian
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4,Pasta
1,2,R2,Pasta Di Risotto Italia - Italian Bistro,"Italian,Pasta,Healthy",4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4,Healthy
...,...,...,...,...,...,...,...,...,...,...
131,132,R132,Qrew,"Desserts,Beverages,Snacks",3.5,"Qrew is a restaurant located in UAE, serving a...",27,Vanilla Milkshake,Hor Al Anz,Beverages
131,132,R132,Qrew,"Desserts,Beverages,Snacks",3.5,"Qrew is a restaurant located in UAE, serving a...",27,Vanilla Milkshake,Hor Al Anz,Snacks
131,132,R132,Qrew,"Desserts,Beverages,Snacks",3.5,"Qrew is a restaurant located in UAE, serving a...",27,,Hor Al Anz,Desserts
131,132,R132,Qrew,"Desserts,Beverages,Snacks",3.5,"Qrew is a restaurant located in UAE, serving a...",27,,Hor Al Anz,Beverages


#### Droping Not Required Columns

In [23]:
df.drop(columns=['Unnamed: 0','Cuisines'],axis=1,inplace=True)

In [24]:
df.head()

Unnamed: 0,Restaurant ID,Restaurant Name,Rating,Description,reviews,best_selling_dishes names,locations_serve,Cuisines_serve
0,R1,Shrimp Kingdom,4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,text not found,Jumeirah Beach Residence - JBR,Seafood
0,R1,Shrimp Kingdom,4.0,"Shrimp Kingdom is a restaurant located in UAE,...",136,text not found,Jumeirah Beach Residence - JBR,International
1,R2,Pasta Di Risotto Italia - Italian Bistro,4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4,Italian
1,R2,Pasta Di Risotto Italia - Italian Bistro,4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4,Pasta
1,R2,Pasta Di Risotto Italia - Italian Bistro,4.0,Pasta Di Risotto Italia - Italian Bistro is a ...,1510,Penne Di Alfredo Pasta,Al Warqa 4,Healthy


#### Downlaoding Final File

In [25]:
df.to_excel(r'D:\New folder (2)\New folder\New Data Combined.xlsx')