# One at the time

In [138]:
from pydantic import BaseModel
from openai import OpenAI
import json
import os
import numpy as np
from dotenv import load_dotenv
import pickle
from time import sleep, perf_counter
load_dotenv()

with open("graph_simplified_id.pickle", "rb") as f:
    G = pickle.load(f)

client = OpenAI(
    api_key=os.environ["GEMINI_API_KEY"],
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

class ScenicValue(BaseModel):
    scenic_value: float

def assign_scenic_value(stop_data, n=5):
    """
    Assigns a 'scenic_value' to the stop_data using an LLM.
    
    Parameters:
    stop_data (dict): The dictionary containing stop information.
    
    Returns:
    dict: The updated dictionary with the 'scenic_value' key added.
    """
    stop_name = next(iter(stop_data))  # Assuming stop_data is {stop_name: {...}}
    neighbors = stop_data[stop_name].get('neighbors', {})
    neighbor_names = list(neighbors.keys())
    
    result = client.beta.chat.completions.parse(
        model="gemini-1.5-flash",
        messages=[
            {"role": "system", "content": "You provide scenic values between 0 and 10 for stops in Stockholm."},
            {"role": "user", "content": (
                "I am trying to assess the scenic value of a stop in Stockholm."
                f"The stop name is '{stop_name}' and it has neighbors: {neighbor_names}."
                "Please provide a scenic value between 0 and 10 for the stop."
                "A scenic value of 5 means intuitively this place is average and does not evoke any positive or negative emotions."
                "Assume that the joy of a place is normally distirbuted according to N(5, 2)," 
                "meaning that a place with scenic value 9 is better than 95% of the places."
            )},
        ],
        n=n,
        temperature=2,
        response_format=ScenicValue,
    )

    scenic_values = [json.loads(s.message.content)["scenic_value"] for s in result.choices] # list of scenic values: [7.1, 7.1, 7, 7.5, 7.6]
    G[stop_name]["scenic_value"] = (np.mean(scenic_values), np.std(scenic_values)) # assigns the stop dictionary with a 'scenic_value' tuple


# Process a single stop (for demonstration)
stop_name = "Stockholmsvägen"
stop_data = {stop_name: G[stop_name]}
assign_scenic_value(stop_data)
G[stop_name]

# To process all stops, iterate over the graph:
#for stop_name in G:
#    start_time = perf_counter()
#    stop_data = {stop_name: G[stop_name]}
#    assign_scenic_value(stop_data)

    # Rate limiting: ensure 4 seconds between requests
#    elapsed = perf_counter() - start_time
#    sleep(max(4 - elapsed, 0))

# Save the updated graph
#with open("graph_with_scenic_values.pickle", "wb") as f:
#    pickle.dump(G, f)

{'neighbors': {'Källvägen': {'travel_time': 1168.0,
   'route_name': 'Bagarmossen',
   'transport_mode': 'bus'},
  'Murklevägen': {'travel_time': 1780.0,
   'route_name': 'Gröndal',
   'transport_mode': 'bus'}},
 'stop_latitude': 59.280605,
 'stop_longitude': 18.08223,
 'arrival_time': '07:03:02',
 'departure_time': '07:03:02',
 'transport_mode': 'bus',
 'scenic_value': (7.1, 0.2898275349237886)}

# Batched (does not work yet)

In [142]:
from pydantic import BaseModel
from typing import Tuple, Dict, List
from openai import OpenAI
import json
from tqdm import tqdm
import os
from dotenv import load_dotenv
import pickle
import math
from time import perf_counter, sleep

load_dotenv()

# Load the graph from a pickle file
with open("graph_simplified_id.pickle", "rb") as f:
    G = pickle.load(f)

# Initialize the OpenAI client with the API key
client = OpenAI(
    api_key=os.environ["GEMINI_API_KEY"],
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

# Update the Pydantic model to validate a dictionary instead of a tuple
class ScenicValue(BaseModel):
    scenic_values: List[float]

def assign_scenic_values_batch(batch_stops):
    """
    Assigns 'scenic_value' to a batch of stops using an LLM.
    
    Parameters:
    batch_stops (list): A list of tuples containing stop names and their information.
    
    Returns:
    dict: A dictionary with stop names as keys and scenic values as values.
    """
    # Prepare the prompt for the batch of stops
    batch_input = []
    for stop_name, stop_info in batch_stops:
        neighbors = stop_info.get('neighbors', {})
        neighbor_names = list(neighbors.keys())
        batch_input.append({
            "stop_name": stop_name,
            "neighbors": neighbor_names
        })
    
    prompt_content = (
        "You will assess the scenic value of multiple stops in Stockholm. "
        "For each stop, provide a scenic value between 0 and 10 based on the stop's name and its neighbors. "
        "A scenic value of 5 means the place is average and does not evoke any positive or negative emotions. "
        "Assume that the joy of a place is normally distributed as N(5, 2). "
        "\nRespond with a dictionary where keys are stop names and values are their scenic values.\n\n"
        "Here is the list of stops and their neighbors:\n" +
        "\n".join([f"{item['stop_name']}: {item['neighbors']}" for item in batch_input]) +
        "\n\nRespond with a list in the format: "
        "[value1, value2, ...]. "
        "The response MUST contain exactly one entry for each provided stop."
    )
    
    # Make the API call
    result = client.chat.completions.create(
        model="gemini-1.5-flash-8b",
        messages=[
            {"role": "system", "content": "You provide scenic values between 0 and 10 for stops in Stockholm."},
            {"role": "user", "content": prompt_content},
        ],
        temperature=1,
    )
    
    # Get the content from the first choice
    raw_content = result.choices[0].message.content
    raw_content = raw_content.strip().strip('```json').strip('```')

    # Convert string dictionary representation to actual dictionary
    response_content = eval(raw_content)
    validated_response = ScenicValue(scenic_values=response_content)
    
    # Check that all stops are included in the response
    batch_stop_names = {stop[0] for stop in batch_stops}
    if set(validated_response.scenic_values.keys()) != batch_stop_names:
        raise ValueError("Received scenic values for different stops than expected")
    
    return validated_response.scenic_values

    
# Process all stops using batching
batch_size = 10  # Adjust this size based on API input constraints
stops = list(G.items())
total_batches = math.ceil(len(stops) / batch_size)

for i in tqdm(range(total_batches)):
    start_time = perf_counter()
    
    print(f"Processing batch {i+1}/{total_batches}")
    batch_stops = stops[i*batch_size:(i+1)*batch_size]
    scenic_values = assign_scenic_values_batch(batch_stops)
    
    # Update the graph with the scenic values
    for stop_name, value in scenic_values.items():
        G[stop_name]["scenic_value"] = value
    
    # Rate limiting: ensure 4 seconds between requests
    elapsed = perf_counter() - start_time
    sleep(max(4 - elapsed, 0))

# Save the updated graph
with open("graph_with_scenic_values.pickle", "wb") as f:
    pickle.dump(G, f)

  0%|          | 0/543 [00:00<?, ?it/s]

Processing batch 1/543


  0%|          | 0/543 [00:05<?, ?it/s]


SyntaxError: invalid syntax (<string>, line 2)