In [None]:
import os
import pandas as pd
from googleapiclient.discovery import build

# API Key and Custom Search Engine ID (replace with yours)
API_KEY = '**********'
CSE_ID = '***********'

def google_search(query, api_key, cse_id, start_index=1):
    """Perform a Google Search using the Custom Search API."""
    service = build("customsearch", "v1", developerKey=api_key)
    res = service.cse().list(q=query, cx=cse_id, start=start_index).execute()
    return res.get('items', [])

def collect_data(query, num_results=100):
    """Collect data from search results."""
    data = []
    start_index = 1
    while len(data) < num_results:
        try:
            results = google_search(query, API_KEY, CSE_ID, start_index)
            if not results:
                break
            for item in results:
                data.append({
                    'Title': item.get('title'),
                    'Link': item.get('link'),
                    'Snippet': item.get('snippet'),
                    'DisplayLink': item.get('displayLink')
                })
            start_index += 10  # API fetches 10 results per page
        except Exception as e:
            print(f"An error occurred: {e}")
            break
    return data[:num_results]

def enrich_data(df):
    """Add extra columns to the dataframe."""
    df['Event Type'] = df['Title'].apply(
        lambda x: 'Anime & Comics Cosplay Festival' if 'Anime & Comics' in x else 'Other'
    )
    df['Event Date'] = df['Snippet'].str.extract(r'(\b[A-Za-z]+\s\d{1,2},\s\d{4}\b)')
    df['Venue'] = df['Snippet'].str.extract(r'at\s([A-Za-z\s]+)')
    df['Language'] = 'English'
    df['Price Range'] = df['Snippet'].str.extract(r'(\$\d+\s?-\s?\$\d+)')
    df['Keywords'] = df['Snippet'].apply(lambda x: ', '.join(x.split()[:5]))
    df['Region'] = 'Baku'
    df['Category'] = 'General'
    df['Source'] = df['DisplayLink']
    df['Event Name'] = df['Title']
    df['Event Details'] = df['Snippet']
    return df

if __name__ == "__main__":
    query = "site:tickets-az.com/en/baku/events"
    num_results = 100

    results = collect_data(query, num_results)
    df = pd.DataFrame(results)
    df = enrich_data(df)

    data_folder = "data"
    os.makedirs(data_folder, exist_ok=True)

    csv_filename = os.path.join(data_folder, 'scraped_data_team_26.csv')
    df.to_csv(csv_filename, index=False)

    print(f"Data collection and enrichment complete! Saved to {csv_filename}")
