In [None]:
import requests
import pandas as pd
import time
import os

API_KEY = "my api key"

cities = [
    {"city": "Mecca", "region": "West"},
    {"city": "Jeddah", "region": "West"},
    {"city": "Taif", "region": "West"},
    {"city": "Khobar", "region": "East"},
    {"city": "Umluj", "region": "North"},
    {"city": "AlUla", "region": "West"},
    {"city": "Tabuk", "region": "North"},
    {"city": "Medina", "region": "West"},
    {"city": "Riyadh", "region": "Central"},
    {"city": "Abha", "region": "South"},
    {"city": "Al Ahsa", "region": "East"},
    {"city": "Najran", "region": "South"},
    {"city": "Qatif", "region": "East"},
    {"city": "Buraidah", "region": "Central"},
    {"city": "Unaizah", "region": "Central"},
    {"city": "Rijal Alma", "region": "South"},
    {"city": "Hail", "region": "North"},
    {"city": "Yanbu", "region": "West"},
    {"city": "Al Baha", "region": "South"},
    {"city": "Jazan", "region": "South"}


]


place_types = [
    {"type": "hotel", "max_results": 100},
    {"type": "restaurant", "max_results": 100},
    {"type": "tourist_attraction", "max_results": 20},
    {"type": "shopping_mall", "max_results": 20},
    {"type": "museum", "max_results": 10},
    {"type": "cafe", "max_results": 30},
    {"type": "park", "max_results": 10},
    {"type": "amusement_park", "max_results": 20}
]

# Create a folder to save the files
os.makedirs("csv", exist_ok=True)

# Start looping through cities and place types
for city_info in cities:
    city = city_info["city"]
    region = city_info["region"]

    city_data = []

    for place in place_types:
        place_type = place["type"]
        max_results = place["max_results"]

        print(f"🔍 Searching for {place_type}s in {city}...")

        url = "https://maps.googleapis.com/maps/api/place/textsearch/json"
        params = {
            "query": f"{place_type}s in {city}",
            "key": API_KEY
        }

        all_places = []
        while True:
            response = requests.get(url, params=params)
            results = response.json().get("results", [])
            all_places.extend(results)

            next_page_token = response.json().get("next_page_token")
            if next_page_token and len(all_places) < max_results:
                time.sleep(2)
                params["pagetoken"] = next_page_token
            else:
                break

        for place_info in all_places[:max_results]:
            details_url = "https://maps.googleapis.com/maps/api/place/details/json"
            details_params = {
                "place_id": place_info["place_id"],
                "fields": "name,rating,reviews,types",
                "key": API_KEY
            }

            try:
                details_response = requests.get(details_url, params=details_params)
                details = details_response.json().get("result", {})
                reviews = details.get("reviews", [])
                place_category = details.get("types", [None])[0]

                for review in reviews:
                    city_data.append({
                        "Region": region,
                        "City": city,
                        "Place Type": place_type,
                        "Place Category": place_category,
                        "Place Name": details.get("name", ""),
                        "Rating": details.get("rating", None),
                        "Review Text": review.get("text", ""),
                        "Reviewer Language": review.get("language", "unknown")
                    })

                time.sleep(1)

            except Exception as e:
                print(f"❌ Error in {place_type} at {city}: {e}")
                continue

    output_file = f"{city}.csv"
    pd.DataFrame(city_data).to_csv(output_file, index=False)
    print(f"✅ Saved {len(city_data)} reviews in {output_file}\n")


🔍 Searching for hotels in Al Baha...
🔍 Searching for restaurants in Al Baha...
🔍 Searching for tourist_attractions in Al Baha...
🔍 Searching for shopping_malls in Al Baha...
🔍 Searching for museums in Al Baha...
🔍 Searching for cafes in Al Baha...
🔍 Searching for parks in Al Baha...
🔍 Searching for amusement_parks in Al Baha...
✅ Saved 905 reviews in Al Baha.csv



In [90]:
import pandas as pd
import os

folder_path = "/Users/macbookpro/code/HayaAlsubie/Beyond_the_Stars/raw_data"
all_files = [f for f in os.listdir(folder_path) if f.endswith(".csv")]

dfs = []

for file in all_files:
    df = pd.read_csv(os.path.join(folder_path, file))
    dfs.append(df)

merged_df = pd.concat(dfs, ignore_index=True)


merged_df.to_csv("all_reviews.csv", index=False)

print(f" Done! Merged {len(all_files)} files into one file with {len(merged_df)} rows.")


 Done! Merged 20 files into one file with 19345 rows.


In [91]:
file_path = "all_reviews.csv"
df = pd.read_csv(file_path)
df.shape

(19345, 8)

In [110]:
df.columns

Index(['Region', 'City', 'Place Type', 'Place Category', 'Place Name',
       'Rating', 'Review Text', 'Reviewer Language'],
      dtype='object')

In [113]:
df['City'].unique()

array(['Hail', 'Umluj', 'Unaizah', 'Mecca', 'Buraidah', 'Al Ahsa',
       'AlUla', 'Jeddah', 'Abha', 'Medina', 'Khobar', 'Taif', 'Al Baha',
       'Najran', 'Tabuk', 'Qatif', 'Rijal Alma', 'Yanbu', 'Riyadh',
       'Jazan'], dtype=object)

In [112]:
df.columns

Index(['Region', 'City', 'Place Type', 'Place Category', 'Place Name',
       'Rating', 'Review Text', 'Reviewer Language'],
      dtype='object')

In [115]:
df['Place Type'].unique()

array(['hotel', 'restaurant', 'tourist_attraction', 'shopping_mall',
       'museum', 'cafe', 'park', 'amusement_park'], dtype=object)