In [None]:
import pandas as pd

DAY = 24*60*60

In [None]:
df = pd.read_csv('./connection_graph.csv', index_col=0, low_memory=False)
df

In [None]:
# rm the first column and duplicate rows
# df = df.drop(df.columns[0], axis=1)
# df.drop_duplicates(inplace=True)
df
# save to file

In [None]:
# df.to_csv('./connection_graph.csv')

In [None]:
def get_sec_from_str(time_str):
    time_str = time_str.split(':')
    return (int(time_str[0]) * 3600 + int(time_str[1]) * 60 + int(time_str[2])) % DAY

df['departure_time_sec'] = df['departure_time'].apply(get_sec_from_str)
df['arrival_time_sec'] = df['arrival_time'].apply(get_sec_from_str)
df

In [None]:
stops = df['start_stop'].unique()
stops, stops.shape

In [None]:
import numpy as np

import networkx as nx

graph = nx.DiGraph()
for stop in stops:
    graph.add_node(stop)

for number, edge in df.iterrows():
  if not graph.has_edge(edge['start_stop'], edge['end_stop']):
    graph.add_edge(edge['start_stop'], edge['end_stop'], schedule=[])

  graph[edge['start_stop']] [edge['end_stop']] ['schedule'].append({
    'dep': edge['departure_time_sec'],
    'line': edge['line'],
    'end_stop_lat': edge['end_stop_lat'],
    'end_stop_lon': edge['end_stop_lon'],
    'arrival': edge['arrival_time_sec'],
    'id': number
    })


def convert_to_dataframe(graph):
  for start, stop in graph.edges:
    graph[start][stop]['schedule'] = pd.DataFrame(graph[start][stop]['schedule'])

convert_to_dataframe(graph)

In [None]:
def print_graph(graph):
  for start, stop in graph.edges:
    print(start, '->', stop)
    print(graph[start][stop]['schedule'])
# print_graph(graph)

In [81]:
from typing import Dict


class MinProperties:
  def reset(self):
    self.min = float('inf')
    self.min_diff = float('inf')
    self.min_idx = 0

min_properties = MinProperties()  

def difference(row, departure, additional_cost=0):
    global min_properties
    arrival = row['arrival']
    new_departure = row['dep']

    if departure > new_departure > 0 and departure - new_departure < 10 * 60 * 60:
        return row
    
    value = (arrival - departure + DAY) % DAY + additional_cost

    if value < min_properties.min_diff:
        min_properties.min = arrival
        min_properties.min_diff = value
        min_properties.min_idx = row['id']
    return row

class NodeFeatures:
  def __init__(self, id, name: str, time: float, prev):
    self.id = id
    self.name = name
    self.time = time # departue time
    self.prev = prev

  def set_time(self, departure, edge: float, prev):
    time, id = self.get_best_time(edge, departure)

    if time < self.time:
      self.prev = prev
      self.id = id
      self.time = time

  def get_best_time(self, edge, departure):
    schedule = edge['schedule']

    min_properties.reset()
    schedule.apply(difference, axis=1, args=(departure,))

    return min_properties.min, min_properties.min_idx

def get_best_path(visited: Dict[str, NodeFeatures], destination: str):
  start = visited[destination]
  path = [start]

  while start.prev:
    start = start.prev
    path.append(start)

  return path[::-1]


np.seterr(divide='ignore', invalid='ignore')

def djikstra(graph: nx.Graph, start: str, destination: str, starting_time: int):
  unvisited: Dict[str, NodeFeatures] = {node: NodeFeatures(-1, node, np.inf, None) for node in graph.nodes}
  visited: Dict[str, NodeFeatures] = {}

  start_stop = unvisited.pop(start)
  start_stop.time = starting_time

  while unvisited:
    neighbours = graph[start_stop.name]
    best_neighbour = NodeFeatures(-1, "", np.inf, None)
    for neigbour in neighbours:
      if not neigbour in unvisited:
        continue

      unvisited[neigbour].set_time(start_stop.time, neighbours[neigbour], start_stop)

      # ex 23:50 > 00:10 ... and best_neighbour.time - unvisited[neigbour].time < 10 * 60 * 60
      if best_neighbour.time > unvisited[neigbour].time:
        best_neighbour = unvisited[neigbour]

    if best_neighbour.time == np.inf:
      start_stop = unvisited.popitem()[1]
      continue

    visited[start_stop.name] = start_stop
    start_stop = unvisited.pop(best_neighbour.name)

  return get_best_path(visited, destination)


visited = djikstra(graph, 'DWORZEC AUTOBUSOWY', 'FAT', 23 * 60 * 60 + 30 * 60)
visited

[<__main__.NodeFeatures at 0x7c1d3245b9d0>,
 <__main__.NodeFeatures at 0x7c1d3245b390>,
 <__main__.NodeFeatures at 0x7c1d356ea390>,
 <__main__.NodeFeatures at 0x7c1d33182d10>,
 <__main__.NodeFeatures at 0x7c1d30022f10>,
 <__main__.NodeFeatures at 0x7c1d30022fd0>,
 <__main__.NodeFeatures at 0x7c1d30021510>,
 <__main__.NodeFeatures at 0x7c1d30023150>,
 <__main__.NodeFeatures at 0x7c1d30022210>,
 <__main__.NodeFeatures at 0x7c1d30023bd0>,
 <__main__.NodeFeatures at 0x7c1d30023b50>,
 <__main__.NodeFeatures at 0x7c1d33183750>]

In [82]:
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

node = None
for idx, node in enumerate(visited):
  # print(node.name, node.time)
  try:
    row = df.loc[node.id]
    # display only the most important columns: line, start_stop, end_stop, departure_time, arrival_time
    print(row[['line', 'start_stop', 'end_stop', 'departure_time', 'arrival_time']].values)

  except:...
    # print(row[['line', 'start_stop', 'end_stop', 'departure_time', 'arrival_time']].columns.values)
  print()


['243' 'DWORZEC AUTOBUSOWY' 'EPI' '23:33:00' '23:35:00']

['250' 'EPI' 'DWORZEC GŁÓWNY' '24:04:00' '24:05:00']

['247' 'DWORZEC GŁÓWNY' 'Arkady (Capitol)' '24:21:00' '24:23:00']

['251' 'Arkady (Capitol)' 'pl. Legionów' '24:37:00' '24:40:00']

['251' 'pl. Legionów' 'Grabiszyńska' '24:40:00' '24:42:00']

['251' 'Grabiszyńska' 'Pereca' '24:42:00' '24:43:00']

['251' 'Pereca' 'Stalowa' '24:43:00' '24:44:00']

['251' 'Stalowa' 'pl. Srebrny' '24:44:00' '24:44:00']

['251' 'pl. Srebrny' 'Bzowa (Centrum Historii Zajezdnia)' '24:44:00'
 '24:45:00']

['251' 'Bzowa (Centrum Historii Zajezdnia)' 'Hutmen' '24:45:00' '24:46:00']

['251' 'Hutmen' 'FAT' '24:46:00' '24:48:00']



In [None]:
departure = 34900

df2 = pd.DataFrame(0, index=np.arange(df.shape[0]), columns=['diff'])
df2['diff'] = (df['departure_time_sec'] + DAY - departure) % DAY

# df2['diff'].min() find min and index of min
df2[df2['diff'] > 0].idxmin().iloc[0]

In [84]:
from typing import Dict

min_properties = MinProperties()

class NodeFeatures:
  def __init__(self, id, name: str, time: float, prev):
    self.id = id
    self.name = name
    self.time = time # departue time
    self.prev = prev

  def set_time(self, departure, edge: float, prev):
    time, id = self.get_best_time(edge, departure)

    print("time", time)
    print("self.time", self.time)
    if time < self.time:
      self.prev = prev
      self.id = id
      self.time = time
      return id

  def get_best_time(self, edge, departure):
    schedule = edge['schedule']
    # times = schedule[['arrival']]

    distance_cost = calculate_manhatan(schedule['end_stop_lat'], schedule['end_stop_lon'])
    min_properties.reset()
    schedule.apply(difference, axis=1, args=(departure, distance_cost))

    return min_properties.min, min_properties.min_idx
    # best_time = times[times['arrival'] - distance_cost > departure].min().iloc[0]

    # try:
    #   idx = times[times['arrival'] - distance_cost > departure].idxmin()
    #   row = schedule['id'][idx]
    # except:
    #   row = None

    # return best_time, row

def get_best_path(visited: Dict[str, NodeFeatures], destination: str):
  start = visited[destination]
  path = [start]

  while start.prev:
    start = start.prev
    path.append(start)

  return path[::-1]

def calculate_manhatan(start_lat, start_long):
  global target_lat, target_long, TRAM_SPEED
  return (abs(start_lat - target_lat) + abs(start_long - target_long)) / TRAM_SPEED

np.seterr(divide='ignore', invalid='ignore')

TRAM_SPEED = 16 * 1000 / 3600
target_lat = None
target_lat = None

def a_star(graph: nx.Graph, start: str, destination: str, starting_time: int):
  global target_long, target_lat
  target_lat = df[df['start_stop'] == destination].iloc[0]['start_stop_lat']
  target_long = df[df['start_stop'] == destination].iloc[0]['start_stop_lon']

  unvisited: Dict[str, NodeFeatures] = {node: NodeFeatures(-1, node, np.inf, None) for node in graph.nodes}
  visited: Dict[str, NodeFeatures] = {}

  start_stop = unvisited.pop(start)
  start_stop.time = starting_time

  while unvisited:
    neighbours = graph[start_stop.name]
    best_neighbour = NodeFeatures(-1, "", np.inf, None)
    for neigbour in neighbours:
      if not neigbour in unvisited:
        continue

      unvisited[neigbour].set_time(start_stop.time, neighbours[neigbour], start_stop)

      if best_neighbour.time > unvisited[neigbour].time:
        best_neighbour = unvisited[neigbour]

    if best_neighbour.time == np.inf:
      start_stop = unvisited.popitem()[1]
      continue

    visited[start_stop.name] = start_stop
    start_stop = unvisited.pop(best_neighbour.name)

  return get_best_path(visited, destination)


visited = a_star(graph, 'DWORZEC AUTOBUSOWY', 'FAT', 34900)
visited

ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().

In [None]:
for idx, node in enumerate(visited):
  # print(node.name, node.time)
  try:
    row = df.loc[node.id]
    # display only the most important columns: line, start_stop, end_stop, departure_time, arrival_time
    print(row[['line', 'start_stop', 'end_stop', 'departure_time', 'arrival_time']].values)

  except:...
    # print(row[['line', 'start_stop', 'end_stop', 'departure_time', 'arrival_time']].columns.values)
  print()

In [None]:

class NodeFeatures3:
  def __init__(self, id, name: str, time: float, prev):
    self.id = id
    self.name = name
    self.line = ""
    self.time = time # departue time
    self.prev = prev

  def set_time(self, departure, edge: float, prev):
    time, id = self.get_best_time(edge, departure)

    if time < self.time:
      self.prev = prev
      self.id = id['id']
      self.line = id['line']
      self.time = time
      return id

  def get_best_time(self, edge, departure):
    schedule = edge['schedule']
    try:
      connection = schedule[calculate_cost(schedule, departure)][0]
      best_time = departure.time
    except:
      connection = schedule.iloc[0]
      best_time = departure.time + 1.0

    print(connection)
    return best_time, connection

def calculate_cost(schedule, departure):
  return schedule['line'] == departure.line

def get_best_path(visited: Dict[str, NodeFeatures3], destination: str):
  start = visited[destination]
  path = [start]

  while start.prev:
    start = start.prev
    path.append(start)

  return path[::-1]

def calculate_manhatan(start_lat, start_long):
  global target_lat, target_long, TRAM_SPEED
  return (abs(start_lat - target_lat) + abs(start_long - target_long)) / TRAM_SPEED

np.seterr(divide='ignore', invalid='ignore')

TRAM_SPEED = 16 * 1000 / 3600
target_lat = None
target_lat = None

def a_star_changings(graph: nx.Graph, start: str, destination: str, starting_time: int):
  global target_long, target_lat
  target_lat = df[df['start_stop'] == destination].iloc[0]['start_stop_lat']
  target_long = df[df['start_stop'] == destination].iloc[0]['start_stop_lon']

  unvisited: Dict[str, NodeFeatures3] = {node: NodeFeatures3(-1, node, np.inf, None) for node in graph.nodes}
  visited: Dict[str, NodeFeatures3] = {}

  start_stop = unvisited.pop(start)
  start_stop.time = 0

  while unvisited:
    neighbours = graph[start_stop.name]
    best_neighbour = NodeFeatures3(-1, "", np.inf, None)
    for neigbour in neighbours:
      if not neigbour in unvisited:
        continue

      unvisited[neigbour].set_time(start_stop, neighbours[neigbour], start_stop)

      if best_neighbour.time > unvisited[neigbour].time:
        best_neighbour = unvisited[neigbour]

    if best_neighbour.time == np.inf:
      start_stop = unvisited.popitem()[1]
      continue

    visited[start_stop.name] = start_stop
    start_stop = unvisited.pop(best_neighbour.name)

  return get_best_path(visited, destination)


visited = a_star_changings(graph, 'DWORZEC AUTOBUSOWY', 'FAT', 34900)
visited

In [None]:
for idx, node in enumerate(visited):
  # print(node.name, node.time)
  print(node.line, node.name, node.time)
  try:
    row = df.loc[node.id]
    # display only the most important columns: line, start_stop, end_stop, departure_time, arrival_time
    print(row[['line', 'start_stop', 'end_stop', 'departure_time', 'arrival_time']].values)

  except:...
    # print(row[['line', 'start_stop', 'end_stop', 'departure_time', 'arrival_time']].columns.values)
  print()

In [None]:

# times[['departure_sec']].max()


In [None]:

times = df[df['start_stop'] == 'DWORZEC AUTOBUSOWY'][df['end_stop'] == 'FAT']
best_time = times[times['arrival'] > departure].min().iloc[0]

idx = times[times['arrival'] > departure].idxmin()

In [None]:
min = float('inf')
min_diff = float('inf')
min_idx = 0
def difference(row, departure):
    global min, min_idx, min_diff
    arrival = row['arrival_time_sec']
    new_departure = row['departure_time_sec']

    if departure > new_departure > 0 and departure - new_departure < 10 * 60 * 60:
        return row
    
    value = (arrival - departure + DAY) % DAY

    if value < min_diff:
        min = arrival
        min_diff = value
        min_idx = row.name
    return row


departure = 86290
times = df[df['start_stop'] == 'DWORZEC AUTOBUSOWY']

times.apply(difference, axis=1, args=(departure,))
min, min_idx

In [None]:
df.loc[min_idx]

In [None]:
df[df['start_stop'] == 'DWORZEC AUTOBUSOWY'].sort_values(by='arrival_time_sec', ascending=False)
