# Creating the dictionary graph from raw csv file

In [11]:
import pandas as pd 
from tqdm import tqdm
import pickle

path = "routes_master_data.csv"
df = pd.read_csv(path)
graph = {}
# Initiating the graph
for _, row in tqdm(df.iterrows(), total=len(df), desc="Creating nodes"):
    stop_id = row['StopPlace Name']
    if stop_id not in graph:
        graph[stop_id] = {
            'neighbors': {},  # Initialize an empty dictionary for neighbors
            'latitude': row['StopPlace Latitude'],  # Get the stop latitude
            'longitude': row['StopPlace Longitude'],  # Get the stop longitude
        }

# Second pass: Add connections between nodes
# Group the DataFrame by Route ID
grouped = df.groupby('Route_ID')

# Iterate over each route
for route_id, route_df in tqdm(grouped, total=len(grouped), desc="Processing routes"):
    # Sort the route DataFrame by Order
    route_df = route_df.sort_values('Order')
    
    # Iterate over each pair of consecutive stops in the route
    for i in range(len(route_df) - 1):
        current_stop = route_df.iloc[i]
        next_stop = route_df.iloc[i+1]
        
        current_id = current_stop['StopPlace Name']
        next_id = next_stop['StopPlace Name']
        
        # Avoid self-loops
        if current_id != next_id:
            if next_id not in graph[current_id]['neighbors']:
                graph[current_id]['neighbors'][next_id] = {
                    'longitude': current_stop['StopPlace Longitude'], 
                    'latitude': current_stop['StopPlace Latitude'], 
                    'transport_mode': current_stop['TransportMode']
                }
                
with open('graph_with_coordinates_and_neighbors.pickle', 'wb') as file:
    pickle.dump(graph, file)

Creating nodes: 100%|██████████| 786333/786333 [00:17<00:00, 46175.68it/s]
Processing routes: 100%|██████████| 1693/1693 [00:46<00:00, 36.63it/s] 


# System prompts

In [4]:
system_prompt1 = """Evaluate the scenic and visual characteristics of public transport stops in Stockholm based on their name, season, surroundings, and notable features.

Instructions:

	1.	Describe surroundings and features with balanced, factual observations. Highlight both positives and negatives constructively. Avoid scoring or biased language.
	2.	Focus on:
	•	Neighborhood/District: Characterize the area’s vibe (e.g., residential, historic) and its scenic impact.
	•	Surroundings: Mention landmarks, natural features, urban cleanliness, safety, or neglect.
	•	Accessibility: Highlight features like signage, seating, and wheelchair access.
	•	Cultural/Historical Context: Note any relevant importance.
	•	Public Perception: Assess if it’s popular among locals or tourists or isolated.
	•	Comparative Context: Describe how it stands out or falls short compared to nearby stops.
	3.	Include negative features explicitly (e.g., industrial surroundings, poor lighting, unsafe areas).
	4.	If data is missing (e.g., no landmarks), state it clearly without guessing.
	5.	Ensure descriptions are consistent across transport types and avoid redundancy.
	6.	Consider seasonal variations (e.g., winter vs. summer scenery).
	7.	Evaluate the stop’s appeal to tourists and explorers based on access, surroundings, and proximity to attractions.
"""

system_prompt2 = """You are tasked with evaluating the scenic value of a public transport stop in Stockholm based on a detailed description. Assign a score from 0 to 10 by considering the following:
	1.	Key Elements:
	•	Stop Name: Identify the stop being evaluated.
	•	Season: Assess how the current season impacts scenic appeal.
	•	Surroundings: Note nearby landmarks, nature, or urban features.
	•	Accessibility: Consider ease of access and its influence on appeal.
	2.	Scenic Value Scale:
	•	0: Extremely unattractive (e.g., industrial, poorly maintained areas).
	•	5: Average appeal (functional but unremarkable).
	•	10: Iconic or uniquely scenic (e.g., waterfront views, exceptional design).
	3.	Balancing Factors:
	•	Positive Features: Highlight views, pleasant surroundings, or unique attributes.
	•	Negative Factors: Prioritize significant drawbacks (e.g., poor maintenance) in scoring.
	•	Seasonality: While seasonal changes matter, water stops should generally score higher.
	4.	Consistency: Be realistic and critical when negative elements dominate.

The final score should reflect a balanced, nuanced judgment of the stop’s scenic appeal.

### Example Input Description:
The stop name is 'Arlanda'. It is located in an airport complex, surrounded by large parking lots and commercial buildings. The surroundings
feel quite sterile, with little to no greenery. During the winter, the area is gray and cold, and there is not much to see other than the
airport itself.

### Example Output:
- **Scenic Value**: 3

### Explanation:
- The description highlights an **unattractive environment** with **industrial surroundings**, lack of greenery, and a **sterile** atmosphere.
Winter's grayness and cold further detract from the visual appeal, leading to a low score.

### Example Input Description:
The stop name is 'Solna Centrum'. The station is located in a modern urban setting with a lot of open space, several nearby cafés, and some
green patches. It feels clean and well-maintained, and during the spring, flowers bloom around the area.

### Example Output:
- **Scenic Value**: 7

### Explanation:
- The description mentions **modern urban features** with **open space**, **cafés**, and **green patches**. The **seasonal appeal** of spring
adds to the overall scenic experience, but the setting isn't particularly striking or unique. Still, it is **clean, well-maintained**, and has
some pleasant features, earning a mid-to-high score.
"""

# Ratelimits

In [6]:
import time

class RateLimiter:
    def __init__(self, max_tokens_per_minute, max_calls_per_minute):
        self.max_tokens_per_minute = max_tokens_per_minute
        self.max_calls_per_minute = max_calls_per_minute
        
        # Tracking tokens and calls
        self.tokens_generated = 0
        self.calls_made = 0
        
        # Start times for the token and call tracking
        self.start_time_tokens = time.perf_counter()
        self.start_time_calls = time.perf_counter()
        self.time_window = 60  # seconds in a minute

    def _check_and_reset(self, current_time, start_time, current_count):
        """
        Resets the count and start time if the time window has passed.
        """
        elapsed_time = current_time - start_time
        if elapsed_time > self.time_window:
            return current_time, 0  # Reset start time and count
        return start_time, current_count

    def check_limit(self, tokens_used=0):
        """
        Checks and enforces both token and call rate limits. If necessary, sleeps until
        it's safe to proceed.
        """
        current_time = time.perf_counter()

        # Reset token and call counters if a new time window has started
        self.start_time_tokens, self.tokens_generated = self._check_and_reset(
            current_time, self.start_time_tokens, self.tokens_generated
        )
        self.start_time_calls, self.calls_made = self._check_and_reset(
            current_time, self.start_time_calls, self.calls_made
        )

        # Increment token and call counts
        self.tokens_generated += tokens_used
        self.calls_made += 1

        # Calculate delays for tokens
        token_delay = 0
        allowed_tokens = self.max_tokens_per_minute * (
            (current_time - self.start_time_tokens) / self.time_window
        )
        if self.tokens_generated > allowed_tokens:
            token_delay = (self.tokens_generated - allowed_tokens) / (
                self.max_tokens_per_minute / self.time_window
            )

        # Calculate delays for calls
        call_delay = 0
        allowed_calls = self.max_calls_per_minute * (
            (current_time - self.start_time_calls) / self.time_window
        )
        if self.calls_made > allowed_calls:
            call_delay = (self.calls_made - allowed_calls) / (
                self.max_calls_per_minute / self.time_window
            )

        # Sleep for the maximum required delay
        if token_delay > 0 or call_delay > 0:
            time.sleep(max(token_delay, call_delay))

rate_limiter = RateLimiter(max_tokens_per_minute=4_000_000, max_calls_per_minute=4_000) # for gemini-flash-8b

# Scenic value assignment to stops

# Running with asyncio

In [13]:
import json
import pandas as pd
from pydantic import BaseModel
from openai import OpenAI
import instructor
import os
import numpy as np
from dotenv import load_dotenv
import pickle
from tqdm import tqdm
load_dotenv()

client = OpenAI(
    api_key=os.environ["GEMINI_API_KEY"], 
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

class ScenicValue(BaseModel):
    scenic_value: float

def assign_scenic_description(stop_name, season="summer"):
    result = client.beta.chat.completions.parse(
        model="gemini-1.5-flash-8b",
        messages=[
            {
                "role": "system",
                "content": "Respond using a list."
            },
            {
                "role": "user",
                "content": f"""
                Context: '{system_prompt1}'
                
                Meta-data: The stop name is '{stop_name}'. The current season is '{season}'.
                """
            },
        ],
        max_tokens=500,
        temperature=0,
    )
    description = result.choices[0].message.content
    tokens_used = result.usage.total_tokens
    return description, tokens_used


def assign_scenic_value(descriptions):
    result = client.beta.chat.completions.parse(
        model="gemini-1.5-flash-8b",
        messages=[
            {"role": "system", "content": "Respond with one number with one decimal."},
            {"role": "user", "content": f"""
            Instructions:
            <instructions>
            {system_prompt2}
            </instructions>
            
            Description:
            <descriptions>
            {descriptions}
            </descriptions>
            """},
        ],
        temperature=0, 
        response_format=ScenicValue,
    )
    scenic_value = json.loads(result.choices[0].message.content)["scenic_value"]
    tokens_used = result.usage.total_tokens
    return scenic_value, tokens_used

with open('graph_with_coordinates_and_neighbors.pickle', 'rb') as file:
    graph = pickle.load(file)

def sub_graph(data, n=2):
    keys = list(data.keys())
    sliced_keys = keys[:n]
    return {key: data[key] for key in sliced_keys}

graph = sub_graph(graph, n=3) # seet n = len(data) for the full dataset
stop_names = list(graph.keys()) # A list of stops represneted as strings
stop_scenic_values = []

# Assigns scenic value for each stop
for i, stop_name in tqdm(enumerate(stop_names), total=len(stop_names), desc="Assigning scenic values to stops:"):
    descriptions, tokens_used = assign_scenic_description(stop_name)
    scenic_value = assign_scenic_value(descriptions)
    #scenic_value = np.random.randint(0, 10)
    stop_scenic_values.append({stop_name: scenic_value})
    rate_limiter.check_limit(tokens_used=tokens_used)

# Mapping the scenic values to each stops and its corresponding neighbors
scenic_values_dict = {}
for scenic_value in stop_scenic_values:
    for stop, value in scenic_value.items():
        scenic_values_dict[stop] = value

for stop, info in graph.items():
    if stop in scenic_values_dict:
        info['scenic_value'] = scenic_values_dict[stop]
    for neighbor, neighbor_info in info.get('neighbors', {}).items():
        if neighbor in scenic_values_dict:
            neighbor_info['scenic_value'] = scenic_values_dict[neighbor]

print(graph["Campus Roslagen"])
with open('graph_with_scenic_values.pickle', 'wb') as file:
    pickle.dump(graph, file)

Assigning scenic values to stops:: 100%|██████████| 3/3 [00:14<00:00,  4.73s/it]

{'neighbors': {'Gustavslund': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}, 'Astrid Lindgrens gata': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}, 'Backtorp': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}, 'Norrtälje busstation': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}, 'Malsta vägskäl': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}, 'Stockholmsvägen': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus', 'scenic_value': (6.5, 962)}, 'Södra Lohärad': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}, 'Rösa trafikplats': {'longitude': 18.685677, 'latitude': 59.748096, 'transport_mode': 'bus'}}, 'latitude': 59.748096, 'longitude': 18.685677, 'scenic_value': (6.5, 984)}





In [None]:
import asyncio
import google.generativeai as genai
import os
import json
import pickle
import math
from tqdm.asyncio import tqdm_asyncio
from pydantic import BaseModel
from time import perf_counter, sleep
from dotenv import load_dotenv
load_dotenv()

genai.configure(api_key=os.environ["GEMINI_API_KEY"])
model = genai.GenerativeModel("gemini-1.5-flash-8b")
        
class ScenicValue(BaseModel):
    scenic_value: float

async def assign_scenic_description(stop_name, season="summer"):
    result = await model.generate_content_async(
        contents=[
            "Respond using a list.",
            f"Instructions: '{system_prompt1}'",
            f"Context: The stop name is '{stop_name}'. The current season is '{season}'."
        ],
        generation_config=genai.GenerationConfig(
            max_output_tokens=500,
            temperature=0,
        )
    )
    description = result.text
    return description

async def assign_scenic_value(description):
    result = await model.generate_content_async(
        contents=[
            "Respond with one number with one decimal.", 
            f"Instructions: {system_prompt2}", 
            f"Context: {description}"
        ],
        generation_config=genai.GenerationConfig(
            response_mime_type='application/json',
            response_schema=ScenicValue,
            max_output_tokens=50,
            temperature=0,
        )
    )
    scenic_value = json.loads(result.text)["scenic_value"]
    return scenic_value

async def evaluate(stop_name):
    description = await assign_scenic_description(stop_name)
    scenic_value = await assign_scenic_value(description)
    return {stop_name: scenic_value}

async def dummy_evaluate(stop_name):
    scenic_value = np.random.randint(0, 10)
    return {stop_name: scenic_value}

def split_list_into_chunks(original_list, max_chunk_size=2000):
    length = len(original_list)
    num_chunks = math.ceil(length / max_chunk_size)
    base_chunk_size = length // num_chunks
    remainder = length % num_chunks

    chunks = []
    start = 0
    for i in range(num_chunks):
        end = start + base_chunk_size + (1 if i < remainder else 0)
        chunks.append(original_list[start:end])
        start = end

    return chunks

with open('graph_with_coordinates_and_neighbors.pickle', 'rb') as file:
    graph = pickle.load(file)

def sub_graph(data, n=2):
    keys = list(data.keys())
    sliced_keys = keys[:n]
    return {key: data[key] for key in sliced_keys}

graph = sub_graph(graph, n=len(graph)) # set n = len(data) for the full dataset
stop_names = list(graph.keys()) # A list of stops represented as strings
stop_name_chunks = split_list_into_chunks(stop_names)
stop_scenic_values = []

# Creates a list of dictionaries [{stop_name: scenic_value}]
for stop_name_chunk in stop_name_chunks:
    tasks = [dummevaluate(stop_name) for stop_name in stop_name_chunk]
    stop_scenic_values_chunk = await tqdm_asyncio.gather(*tasks)
    stop_scenic_values += stop_scenic_values_chunk
    # Saving
    #with open('stop_scenic_values.json', 'w') as file:
    #    json.dump(stop_scenic_values, file)
    sleep(60)

# Mapping the scenic values to each stops and its corresponding neighbors
scenic_values_dict = {}
for scenic_value in stop_scenic_values:
    for stop, value in scenic_value.items():
        scenic_values_dict[stop] = value

for stop, info in graph.items():
    if stop in scenic_values_dict:
        info['scenic_value'] = scenic_values_dict[stop]
    for neighbor, neighbor_info in info.get('neighbors', {}).items():
        if neighbor in scenic_values_dict:
            neighbor_info['scenic_value'] = scenic_values_dict[neighbor]

print(graph["Campus Roslagen"])
#with open('graph_with_scenic_values.pickle', 'wb') as file:
#    pickle.dump(graph, file)

In [31]:
client = OpenAI(
    api_key=os.environ["GEMINI_API_KEY"], 
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

for stop_name in ['Campus Roslagen', 'Stockholmsvägen', 'Familjens hus', 'Flygfältet', 'Ålandsgatan']:
    season="summer"
    result = client.beta.chat.completions.parse(
        model="gemini-1.5-flash-8b",
        messages=[
            {
                "role": "system",
                "content": "Respond using a list."
            },
            {
                "role": "user",
                "content": f"""
                Context: '{system_prompt1}'
                
                Meta-data: The stop name is '{stop_name}'. The current season is '{season}'.
                """
            },
        ],
        max_tokens=500,
        temperature=0,
    )
    
    descriptions = result.choices[0].message.content
    print(result.usage)

CompletionUsage(completion_tokens=357, prompt_tokens=334, total_tokens=691, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=352, prompt_tokens=334, total_tokens=686, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=324, prompt_tokens=335, total_tokens=659, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=382, prompt_tokens=335, total_tokens=717, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=372, prompt_tokens=335, total_tokens=707, completion_tokens_details=None, prompt_tokens_details=None)


In [41]:
client = OpenAI(
    api_key=os.environ["GEMINI_API_KEY"], 
    base_url="https://generativelanguage.googleapis.com/v1beta/openai/"
)

for _ in range(5):
    result = client.beta.chat.completions.parse(
        model="gemini-1.5-flash-8b",
        messages=[
            {"role": "system", "content": "Respond with one number with one decimal."},
            {"role": "user", "content": f"""
            Instructions:
            <instructions>
            {system_prompt2}
            </instructions>
            
            Description:
            <descriptions>
            {descriptions}
            </descriptions>
            """},
        ],
        temperature=0, 
        max_tokens=50,
        response_format=ScenicValue,
    )
    
    print(result.usage)

CompletionUsage(completion_tokens=14, prompt_tokens=983, total_tokens=997, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=14, prompt_tokens=983, total_tokens=997, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=14, prompt_tokens=983, total_tokens=997, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=14, prompt_tokens=983, total_tokens=997, completion_tokens_details=None, prompt_tokens_details=None)
CompletionUsage(completion_tokens=14, prompt_tokens=983, total_tokens=997, completion_tokens_details=None, prompt_tokens_details=None)


In [200]:
iterations = 1432 #4295 
input_tokens = (1000 + 350) * iterations
output_tokens = (50 + 500) * iterations
total_tokens = input_tokens + output_tokens
input_price = 0.0375 * input_tokens / 1_000_000
output_price = 0.15 * output_tokens / 1_000_000
dollars_to_sek = 10.9
#dollars_to_sek * (input_price + output_price)
total_tokens

2720800

# A star search

In [11]:
import math
import heapq
import plotly.graph_objects as go
import numpy as np
import pickle

def haversine_distance(longitude1, latitude1, longitude2, latitude2):
    """
    Calculates the Haversine distance between two points.

    Args:
    longitude1 (float): The longitude of the first point.
    latitude1 (float): The latitude of the first point.
    longitude2 (float): The longitude of the second point.
    latitude2 (float): The latitude of the second point.

    Returns:
    float: The Haversine distance between the two points.
    """
    R = 6371  # Radius of the Earth
    d_longitude = math.radians(longitude2 - longitude1)
    d_latitude = math.radians(latitude2 - latitude1)
    a = math.sin(d_latitude / 2) ** 2 + math.cos(math.radians(latitude1)) * math.cos(math.radians(latitude2)) * math.sin(d_longitude / 2) ** 2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    distance = R * c
    return distance

def scenic_cost(scenic_value, C=0.1, lam=0.5):
    return C * math.exp(-lam * scenic_value)

def a_star_search(graph, start, goal):
    open_list = []
    heapq.heappush(open_list, (0, start))
    came_from = {start: None}
    cost_so_far = {start: 0}

    while open_list:
        current_cost, current_stop = heapq.heappop(open_list)

        if current_stop == goal:
            break

        for neighbor, neighbor_info in graph[current_stop].get('neighbors', {}).items():
            current_scenic_value = graph[current_stop].get('scenic_value', 0)
            neighbor_scenic_value = neighbor_info.get('scenic_value', 0)
            effective_scenic_value = (current_scenic_value + neighbor_scenic_value) / 2
            distance = haversine_distance(
                graph[current_stop]['longitude'], 
                graph[current_stop]['latitude'], 
                graph[neighbor]['longitude'], 
                graph[neighbor]['latitude']
            )
            new_cost = cost_so_far[current_stop] + distance + scenic_cost(effective_scenic_value)
            
            if neighbor not in cost_so_far or new_cost < cost_so_far[neighbor]:
                cost_so_far[neighbor] = new_cost
                priority = new_cost + haversine_distance(graph[neighbor]['longitude'], graph[neighbor]['latitude'], graph[goal]['longitude'], graph[goal]['latitude'])
                heapq.heappush(open_list, (priority, neighbor))
                came_from[neighbor] = current_stop

    # Reconstruct the path
    current_stop = goal
    path = []
    while current_stop is not None:
        path.append(current_stop)
        current_stop = came_from.get(current_stop)
    path.reverse()
    return path

with open('graph_with_scenic_values.pickle', 'rb') as file:
    graph = pickle.load(file)

stop_names = list(graph.keys()) # A list of stops represented as strings
start_stop = 'Ropsten' #np.random.choice(stop_names) #'Ropsten'
goal_stop = 'Nybroplan' #np.random.choice(stop_names) #'Nybroplan'
shortest_path = a_star_search(graph, start_stop, goal_stop)
print(shortest_path)

def visualize_path(graph, path):
    # Extract all stops' data
    all_longitudes = [graph[stop]['longitude'] for stop in graph]
    all_latitudes = [graph[stop]['latitude'] for stop in graph]
    all_stop_names = list(graph.keys())
    all_scenic_values = [graph[stop].get('scenic_value', 0) for stop in graph]

    # Extract the longitude, latitude, and scenic values of stops in the critical path
    path_longitudes = [graph[stop]['longitude'] for stop in path]
    path_latitudes = [graph[stop]['latitude'] for stop in path]
    path_scenic_values = [graph[stop].get('scenic_value', 0) for stop in path]
    path_stop_names = path

    # Calculate the average of the start and end stop coordinates for centering the map
    start_longitude = graph[path[0]]['longitude']
    start_latitude = graph[path[0]]['latitude']
    end_longitude = graph[path[-1]]['longitude']
    end_latitude = graph[path[-1]]['latitude']
    avg_longitude = (start_longitude + end_longitude) / 2
    avg_latitude = (start_latitude + end_latitude) / 2

    # Create a Plotly figure
    fig = go.Figure()

    # Add all stops as markers
    fig.add_trace(go.Scattermapbox(
        lat=all_latitudes,
        lon=all_longitudes,
        mode='markers',
        marker=go.scattermapbox.Marker(
            size=8,
            color='blue',  # Color for all stops
            opacity=0.7
        ),
        hoverinfo='text',
        hovertext=[f'Stop: {stop_name}<br>Scenic Value: {scenic_value}' 
                   for stop_name, scenic_value in zip(all_stop_names, all_scenic_values)],
        name='All Stops'
    ))

    # Add critical path as a line with highlighted markers
    fig.add_trace(go.Scattermapbox(
        lat=path_latitudes,
        lon=path_longitudes,
        mode='markers+lines',
        marker=go.scattermapbox.Marker(
            size=10,
            color='red',  # Color for critical path stops
            opacity=1.0
        ),
        line=go.scattermapbox.Line(
            color='red',  # Line color for critical path
            width=3
        ),
        hoverinfo='text',
        hovertext=[f'Stop: {stop_name}<br>Scenic Value: {scenic_value}' 
                   for stop_name, scenic_value in zip(path_stop_names, path_scenic_values)],
        name='Critical Path'
    ))

    # Update the layout
    fig.update_layout(
        mapbox_style="open-street-map",
        mapbox_zoom=10,
        mapbox_center_lat=avg_latitude,
        mapbox_center_lon=avg_longitude,
        margin={"r": 0, "t": 0, "l": 0, "b": 0},
        height=800
    )

    # Show the figure
    fig.show()

# Example usage:
visualize_path(graph, shortest_path)

EOFError: Ran out of input