In [1]:
import pandas as pd
import time
import requests
import os
import sys
import subprocess

# --- 1. SETUP: CHECK FOR AND INSTALL REQUIRED LIBRARIES ---

# A flag to check if any packages were installed
packages_installed = False

def check_and_install_package(package_name, version_specifier=None):
    """Checks if a package is installed and installs it if not."""
    global packages_installed
    try:
        if package_name == 'numpy':
            # Special handling for numpy version conflict
            import numpy
            if int(numpy.__version__.split('.')[0]) >= 2:
                print(f"NumPy version {numpy.__version__} is incompatible. Downgrading to numpy<2.")
                subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "numpy", "-y"])
                subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy<2"])
                packages_installed = True
            else:
                print(f"Required package '{package_name}' is already installed.")
        else:
            __import__(package_name)
            print(f"Required package '{package_name}' is already installed.")
    except ImportError:
        print(f"Package '{package_name}' not found. Installing...")
        pip_command = [sys.executable, "-m", "pip", "install", package_name]
        if version_specifier:
            pip_command[2] = f"{package_name}{version_specifier}"
        subprocess.check_call(pip_command)
        packages_installed = True

# Check for and install required packages
check_and_install_package('numpy', version_specifier='<2.0.0')
check_and_install_package('geopy')

# If new packages were installed, prompt the user to restart
if packages_installed:
    print("\nOne or more packages were installed. Please restart your kernel now to load them.")
    print("After restarting, you can run this cell again to continue the process.")
    # Exit the script execution so the user has to manually restart and re-run.
    # This is safer than a runtime-specific restart command.
    sys.exit(0)

# All packages are now confirmed to be installed and compatible.
print("\nAll required packages are installed and ready. Proceeding with data collection.")

# It's safe to import the libraries now
import numpy as np
from geopy.distance import geodesic


# --- 2. SETUP: OVERPASS API & LOCAL DIRECTORY ---

# Define the local path where files will be saved
SAVE_PATH = './data/'

# Overpass API endpoint
OVERPASS_URL = "http://overpass-api.de/api/interpreter"
# Bounding box for Kolkata: (min_lat, min_lon, max_lat, max_lon)
KOLKATA_BOUNDING_BOX = (22.45, 88.2, 22.7, 88.5)

def query_overpass(query):
    """
    Sends a query to the Overpass API and returns the parsed JSON response.

    Args:
        query (str): The Overpass QL query string.

    Returns:
        dict: The JSON response from the API, or an empty dictionary if the request fails.
    """
    try:
        response = requests.post(OVERPASS_URL, data=query, timeout=30)
        response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)
        return response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error querying Overpass API: {e}")
        return None

# --- 3. DATA COLLECTION FUNCTIONS ---

def collect_destinations():
    """
    Collects a list of tourist attractions in Kolkata using the Overpass API.
    """
    print("\nCollecting tourist destinations from Overpass API...")
    # Overpass QL query to find various tourist attractions within Kolkata's bounding box
    query = f"""
        [out:json][timeout:90];
        (
          node["tourism"="museum"]{KOLKATA_BOUNDING_BOX};
          node["leisure"="park"]{KOLKATA_BOUNDING_BOX};
          node["historic"="monument"]{KOLKATA_BOUNDING_BOX};
          node["amenity"="theatre"]{KOLKATA_BOUNDING_BOX};
        );
        out center;
    """
    data = query_overpass(query)

    if not data or 'elements' not in data:
        print("Warning: No destinations were found using the Overpass API.")
        return pd.DataFrame()

    unique_destinations = {}
    for element in data['elements']:
        if element['type'] == 'node':
            name = element['tags'].get('name', 'Unknown')
            # Use the node's ID as a unique key to prevent duplicates
            if element['id'] not in unique_destinations:
                unique_destinations[element['id']] = {
                    'Name': name,
                    'Category': element['tags'].get('tourism', element['tags'].get('leisure', element['tags'].get('historic', 'Other'))),
                    'Latitude': element['lat'],
                    'Longitude': element['lon'],
                    'Description': element['tags'].get('description', f"Description for {name}"),
                    'Top Activities': np.random.choice(['Sightseeing', 'Photography', 'Walking', 'Cultural Experience'], 1)[0],
                    # Dynamically add transportation data
                    'Transport Mode': np.random.choice(['Metro', 'Bus', 'Taxi', 'Uber/Ola'], 1)[0],
                    'Total Distance (km)': np.round(np.random.uniform(5.0, 30.0), 1),
                    'Estimated Travel Time (hrs)': np.round(np.random.uniform(0.5, 2.0), 1)
                }

    destinations_list = []
    for i, (key, details) in enumerate(unique_destinations.items()):
        details.update({
            'ID': i + 1,
            'Entry Fee (INR)': np.random.choice([0, 20, 50, 100], 1)[0],
            'Timing': np.random.choice(['10am - 6pm', '24/7', '9am - 5pm'], 1)[0],
            'Ideal Duration': np.random.choice(['1-2 hrs', '2-3 hrs', '3-4 hrs'], 1)[0],
            'Preferred Travel Type': np.random.choice(['Relaxation', 'Cultural', 'Spiritual', 'Adventure'], 1)[0],
            'Budget (INR)': np.random.randint(5000, 25000),
            'Trip Duration (Days)': np.random.randint(1, 8),
        })
        destinations_list.append(details)

    print(f"Total unique destinations found: {len(destinations_list)}")
    return pd.DataFrame(destinations_list)

def collect_accommodations():
    """Collects a list of hotels in the Kolkata area using the Overpass API."""
    print("\nCollecting accommodations from Overpass API...")
    query = f"""
        [out:json][timeout:90];
        node["tourism"="hotel"]{KOLKATA_BOUNDING_BOX};
        out center;
    """
    data = query_overpass(query)

    if not data or 'elements' not in data:
        print("Warning: No hotels were found using the Overpass API.")
        return pd.DataFrame()

    accommodations_list = []
    for i, element in enumerate(data['elements'][:10]):
        details = {
            'ID': i + 1,
            'Name': element['tags'].get('name', 'Unknown'),
            'Category': np.random.choice(['Luxury', 'Mid-range', 'Budget'], 1)[0],
            'Latitude': element['lat'],
            'Longitude': element['lon'],
            'Rating': np.random.uniform(3.5, 5.0),
            'Price_Level': np.random.choice([3, 4, 5], 1)[0],
            'Budget (INR)': np.random.randint(5000, 25000),
        }
        accommodations_list.append(details)

    print(f"Found {len(accommodations_list)} accommodations.")
    return pd.DataFrame(accommodations_list)

def collect_nearby_restaurants(destinations_df):
    """
    Collects restaurants for each destination using a two-step process:
    1. Query a large pool of restaurants in Kolkata.
    2. Filter that pool to find the top 5 nearest restaurants for each destination.
    """
    print("\nCollecting a general pool of restaurants from Overpass API...")
    query = f"""
        [out:json][timeout:90];
        node["amenity"="restaurant"]{KOLKATA_BOUNDING_BOX};
        out center;
    """
    data = query_overpass(query)

    if not data or 'elements' not in data:
        print("Warning: No general restaurant data was found.")
        return pd.DataFrame()

    restaurant_pool_results = []
    for element in data['elements']:
        if 'name' in element['tags']:
            restaurant_pool_results.append({
                'name': element['tags']['name'],
                'lat': element['lat'],
                'lng': element['lon'],
                'address': element['tags'].get('addr:full', 'Unknown Address')
            })

    all_restaurants_data = []

    for _, dest in destinations_df.iterrows():
        lat, lng = dest['Latitude'], dest['Longitude']
        dest_id = dest['ID']
        dest_point = (lat, lng)

        nearby_restaurants = []
        for restaurant in restaurant_pool_results:
            rest_point = (restaurant['lat'], restaurant['lng'])
            distance = geodesic(dest_point, rest_point).km
            if distance <= 1.5: # Filter by distance
                restaurant['distance'] = distance
                nearby_restaurants.append(restaurant)

        # Sort and take the top 5
        nearby_restaurants.sort(key=lambda x: x['distance'])
        top_5_restaurants = nearby_restaurants[:5]

        for i, place in enumerate(top_5_restaurants):
            all_restaurants_data.append({
                'destination_id': dest_id,
                'restaurant_id': i + 1,
                'restaurant_name': place.get('name', 'Unknown'),
                'rating': np.random.uniform(3.5, 5.0),
                'total_ratings': np.random.randint(50, 1000),
                'price_level': np.random.choice([1, 2, 3], 1)[0],
                'location_lat': place.get('lat'),
                'location_lng': place.get('lng'),
                'address': place.get('address', 'Unknown'),
                'types': ', '.join(['Indian', 'Chinese', 'Cafe'])
            })

    return pd.DataFrame(all_restaurants_data)


# --- 4. MAIN EXECUTION BLOCK ---
if __name__ == '__main__':
    if not os.path.exists(SAVE_PATH):
        os.makedirs(SAVE_PATH)

    # Step 1: Collect destinations
    destinations_df = collect_destinations()
    if not destinations_df.empty:
        destinations_df.to_csv(f'{SAVE_PATH}kolkata_expanded_dataset.csv', index=False)
        print("\nSuccessfully generated 'kolkata_expanded_dataset.csv'.")
    else:
        print("\nSkipping restaurant data collection because no destination data was found.")

    # Step 2: Collect accommodations
    accommodations_df = collect_accommodations()
    if not accommodations_df.empty:
        accommodations_df.to_csv(f'{SAVE_PATH}kolkata_accommodations.csv', index=False)
        print("\nSuccessfully generated 'kolkata_accommodations.csv'.")

    # Step 3: Collect nearby restaurants
    if not destinations_df.empty:
        restaurants_df = collect_nearby_restaurants(destinations_df)
        if not restaurants_df.empty:
            restaurants_df.to_csv(f'{SAVE_PATH}kolkata_nearby_restaurants.csv', index=False)
            print("\nSuccessfully generated 'kolkata_nearby_restaurants.csv'.")
        else:
            print("\nWarning: No restaurant data was found. 'kolkata_nearby_restaurants.csv' was not generated.")

    print("\nAll datasets have been generated. You can now use them with 'ai_planner.py' in a separate notebook.")

Required package 'numpy' is already installed.
Required package 'geopy' is already installed.

All required packages are installed and ready. Proceeding with data collection.

Collecting tourist destinations from Overpass API...
Total unique destinations found: 67

Successfully generated 'kolkata_expanded_dataset.csv'.

Collecting accommodations from Overpass API...
Found 10 accommodations.

Successfully generated 'kolkata_accommodations.csv'.

Collecting a general pool of restaurants from Overpass API...

Successfully generated 'kolkata_nearby_restaurants.csv'.

All datasets have been generated. You can now use them with 'ai_planner.py' in a separate notebook.


In [3]:
import pandas as pd
import requests
import json
import os
import sys
import subprocess
import time
import getpass

# --- 1. SETUP: CHECK FOR AND INSTALL REQUIRED LIBRARIES ---

packages_installed = False

def check_and_install_package(package_name, version_specifier=None):
    global packages_installed
    try:
        if package_name == 'numpy':
            import numpy
            if int(numpy.__version__.split('.')[0]) >= 2:
                print(f"NumPy version {numpy.__version__} is incompatible. Downgrading to numpy<2.")
                subprocess.check_call([sys.executable, "-m", "pip", "uninstall", "numpy", "-y"])
                subprocess.check_call([sys.executable, "-m", "pip", "install", "numpy<2"])
                packages_installed = True
            else:
                print(f"Required package '{package_name}' is already installed.")
        else:
            __import__(package_name)
            print(f"Required package '{package_name}' is already installed.")
    except ImportError:
        print(f"Package '{package_name}' not found. Installing...")
        pip_command = [sys.executable, "-m", "pip", "install", package_name]
        if version_specifier:
            pip_command[2] = f"{package_name}{version_specifier}"
        subprocess.check_call(pip_command)
        packages_installed = True

check_and_install_package('pandas')
check_and_install_package('numpy', version_specifier='<2.0.0')

if packages_installed:
    print("\nOne or more packages were installed. Please restart your kernel now to load them.")
    sys.exit(0)

print("\nAll required packages are installed and ready. Proceeding with itinerary generation.")

# --- 2. CONFIGURATION & DATA LOADING ---

DATA_PATH = './data/'
if not os.path.exists(DATA_PATH):
    print(f"Error: Data directory '{DATA_PATH}' not found.")
    sys.exit(1)

API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview-05-20:generateContent"
API_KEY = ""

def load_data():
    try:
        destinations_df = pd.read_csv(os.path.join(DATA_PATH, 'kolkata_expanded_dataset.csv'))
        accommodations_df = pd.read_csv(os.path.join(DATA_PATH, 'kolkata_accommodations.csv'))
        return destinations_df, accommodations_df
    except FileNotFoundError as e:
        print(f"Error: Required data file not found: {e}")
        return None, None

# --- 3. HELPERS ---

def get_price_range(price_level, item_type):
    if item_type == 'hotel':
        return {3: "Budget (INR 2000-4000)", 4: "Mid-range (INR 4000-8000)", 5: "Luxury (INR 8000-15000)"}.get(price_level, "Unknown")
    elif item_type == 'restaurant':
        return {1: "Low-cost (INR 300-600)", 2: "Mid-range (INR 600-1500)", 3: "Fine-dining (INR 1500-3000)"}.get(price_level, "Unknown")
    return "Unknown"

def get_user_choice(prompt_text, options):
    print(prompt_text)
    for i, option in enumerate(options, 1):
        print(f"  {i}. {option}")
    while True:
        try:
            choice = int(input(f"Enter your choice (1-{len(options)}): "))
            if 1 <= choice <= len(options):
                return options[choice - 1]
            else:
                print("Invalid choice.")
        except ValueError:
            print("Invalid input.")

def generate_itinerary_prompt(details, dest_df, acc_df):
    filtered_dest = dest_df[dest_df['Preferred Travel Type'] == details['travel_type']]
    dest_str = "\n".join([
        f"Name: {d.Name}, Description: {d.Description}, Activities: {d['Top Activities']}, Transport: {d['Transport Mode']}, Travel Time: {d['Estimated Travel Time (hrs)']} hours"
        for _, d in filtered_dest.iterrows()
    ])
    acc_str = "\n".join([
        f"Name: {h.Name}, Category: {h.Category}, Rating: {h.Rating}, Price Range: {get_price_range(h.Price_Level, 'hotel')}"
        for _, h in acc_df.iterrows()
    ])

    restaurants_by_dest = "Restaurant data not available."
    if os.path.exists(os.path.join(DATA_PATH, 'kolkata_nearby_restaurants.csv')):
        rest_df = pd.read_csv(os.path.join(DATA_PATH, 'kolkata_nearby_restaurants.csv'))
        rest_dict = rest_df.groupby('destination_id')['restaurant_name'].apply(list).to_dict()
        restaurants_by_dest = ""
        for dest_id, dest_name in zip(dest_df['ID'], dest_df['Name']):
            if dest_id in rest_dict:
                rest_names = ', '.join(rest_dict[dest_id])
                restaurants_by_dest += f"Restaurants near {dest_name}: {rest_names}\n"

    prompt = f"""
    You are a travel planner AI. Create a {details['duration']}-day itinerary for a {details['travel_type']} trip to Kolkata with a {details['group_type']} group.
    Preferences: Hotel - {details['hotel_pref']}, Restaurant - {details['restaurant_pref']}, Transport - {details['transport_pref']}, Season - {details['season']}.

    <Destinations>
    {dest_str}
    </Destinations>

    <Accommodations>
    {acc_str}
    </Accommodations>

    <Nearby Restaurants>
    {restaurants_by_dest}
    </Nearby Restaurants>

    Include travel times, daily plans, local highlights, and align the recommendations with user preferences.
    """
    return prompt

def call_gemini_api(prompt):
    global API_KEY
    if not API_KEY:
        API_KEY = getpass.("Enter your Gemini API key: ")
    if not API_KEY:
        print("API key not provided.")
        return None

    headers = { 'Content-Type': 'application/json' }
    payload = {"contents": [{"parts": [{"text": prompt}]}]}

    try:
        for attempt in range(5):
            try:
                response = requests.post(f"{API_URL}?key={API_KEY}", headers=headers, data=json.dumps(payload))
                response.raise_for_status()
                result = response.json()
                return result['candidates'][0]['content']['parts'][0]['text']
            except requests.exceptions.RequestException as e:
                print(f"Attempt {attempt + 1} failed: {e}. Retrying...")
                time.sleep(2 ** attempt)
    except Exception as e:
        print("Final error:", e)
        return None

# --- 4. MAIN EXECUTION ---
if __name__ == '__main__':
    try:
        duration = int(input("Enter the trip duration in days (e.g., 3): "))
    except ValueError:
        print("Invalid duration.")
        sys.exit(1)

    travel_type = get_user_choice("Travel type:", ['Cultural', 'Relaxation', 'Spiritual', 'Adventure'])
    group_type = get_user_choice("Group type:", ['Friends', 'Family', 'Solo', 'Group'])
    hotel_pref = get_user_choice("Hotel preference:", ['Budget', 'Mid-range', 'Luxury'])
    restaurant_pref = get_user_choice("Restaurant preference:", ['Low-cost', 'Mid-range', 'Fine-dining'])
    transport_pref = get_user_choice("Transport preference:", ['Public Transport', 'Private Cab', 'Walking-friendly'])
    season = get_user_choice("Season of travel:", ['Winter', 'Summer', 'Monsoon', 'Spring', 'Autumn'])

    trip_details = {
        'duration': duration,
        'travel_type': travel_type,
        'group_type': group_type,
        'hotel_pref': hotel_pref,
        'restaurant_pref': restaurant_pref,
        'transport_pref': transport_pref,
        'season': season
    }

    dest_df, acc_df = load_data()
    if dest_df is not None and acc_df is not None:
        prompt = generate_itinerary_prompt(trip_details, dest_df, acc_df)
        print("\nGenerating itinerary...\n")
        result = call_gemini_api(prompt)
        if result:
            print("="*60)
            print("Kolkata Travel Itinerary")
            print("="*60)
            print(result)
            print("="*60)
        else:
            print("Failed to get response.")


Required package 'pandas' is already installed.
Required package 'numpy' is already installed.

All required packages are installed and ready. Proceeding with itinerary generation.


Enter the trip duration in days (e.g., 3):  2


Travel type:
  1. Cultural
  2. Relaxation
  3. Spiritual
  4. Adventure


Enter your choice (1-4):  1


Group type:
  1. Friends
  2. Family
  3. Solo
  4. Group


Enter your choice (1-4):  2


Hotel preference:
  1. Budget
  2. Mid-range
  3. Luxury


Enter your choice (1-3):  2


Restaurant preference:
  1. Low-cost
  2. Mid-range
  3. Fine-dining


Enter your choice (1-3):  3


Transport preference:
  1. Public Transport
  2. Private Cab
  3. Walking-friendly


Enter your choice (1-3):  2


Season of travel:
  1. Winter
  2. Summer
  3. Monsoon
  4. Spring
  5. Autumn


Enter your choice (1-5):  1



Generating itinerary...



Enter your Gemini API key:  ········


Kolkata Travel Itinerary
Here is a 2-day cultural itinerary for your family trip to Kolkata, carefully crafted to meet your preferences for a mid-range hotel, fine-dining restaurants, private cab transport, and the winter season.

---

### **Kolkata Cultural Immersion: A Family Itinerary**

**Season:** Winter (Ideal for sightseeing with pleasant weather)
**Accommodation Preference:** Mid-range
**Restaurant Preference:** Fine-dining
**Transport Preference:** Private Cab

---

### **Recommended Hotel:**

*   **Boudir Bhater**
    *   **Category:** Mid-range
    *   **Rating:** 3.88/5
    *   **Price Range:** INR 4000-8000 per night
    *   **Highlight:** A comfortable and well-rated hotel that perfectly fits your mid-range budget, offering a convenient base for exploring Kolkata's cultural treasures.

---

### **Day 1: Heritage & Artistic Grandeur**

**Morning (9:00 AM - 1:00 PM): Dive into Kolkata's Rich Past**

*   **9:00 AM - 10:30 AM: Datta Ancestral Home - Birthplace of Vivekananda*