# Assignment 6
## Group Members:
* ### Nils Dunlop, e-mail: gusdunlni@student.gu.se
* ### Francisco Alejandro Erazo Piza, e-mail: guserafr@student.gu.se

## Problem 1

In [9]:
import math
from collections import deque

class Graph:
    def __init__(self, gdict={}):
        self.gdict = gdict
        self.distance = {}
        self.colour = {}
        self.predecessor = {}
        self.finish = {}

    def get_vertices(self):
        return list(self.gdict.keys())

    def get_edges(self):
        edges = []
        for vertex in self.gdict:
            for next_vertex in self.gdict[vertex]:
                if (vertex, next_vertex) not in edges:
                    edges.append((vertex, next_vertex))
        return edges

    def print_path(self, s, v):
        if v in self.gdict.keys():
            if v == s:
                print(s)
            elif self.predecessor[v] == None:
                print("There is no path from", s, "to", v, "exists.")
            else:
                self.print_path(s, self.predecessor[v])
                print(v)
        else:
            print("Node with key", v, "is not in the graph.")

    def initialise_single_source(self, s):
        for v in self.get_vertices():
            self.distance[v] = math.inf
            self.predecessor[v] = None
        self.distance[s] = 0

    def get_weight(self, u, v):
        return self.gdict[u][v]

    def relax(self, u, v):
        if self.distance[v] > self.distance[u] + self.get_weight(u, v):
            self.distance[v] = self.distance[u] + self.get_weight(u, v)
            self.predecessor[v] = u

    def dijkstra(self, s):
        self.initialise_single_source(s)

        priority_queue = [(0, s)]

        while priority_queue:
            priority_queue.sort(reverse=True, key=lambda x: x[0])
            current_cost, current_node = priority_queue.pop()

            for neighbor in self.gdict[current_node]:
                old_distance = self.distance[neighbor]
                self.relax(current_node, neighbor)
                if self.distance[neighbor] != old_distance:
                    priority_queue.append((self.distance[neighbor], neighbor))
                    
    
    def dag_shortest_path(self, s):
        self.initialise_single_source(s)
        sorted_nodes = self.topological_sort()

        for node in sorted_nodes:
            for neighbor in self.gdict[node]:
                self.relax(node, neighbor)

    def topological_sort(self):
        sorted_nodes = deque()
        visited = set()
        
        def dfs(node):
            visited.add(node)
            for neighbor in self.gdict[node]:
                if neighbor not in visited:
                    dfs(neighbor)
            sorted_nodes.appendleft(node)

        for node in self.gdict:
            if node not in visited:
                dfs(node)

        return sorted_nodes

    def shortest_path_dijkstra(self, s, v):
        self.dijkstra(s)
        print("Dijkstra:")
        self.print_path(s, v)
    
    def shortest_path_dag(self, s, v):
        self.dag_shortest_path(s)
        print("DAG:")
        self.print_path(s, v)

if __name__ == "__main__":
    adjacency = {
        "r": {"s": 5, "t": 3},
        "s": {"x": 6, "t": 2},
        "t": {"x": 7, "y": 4, "z": 2},
        "x": {"y": -1, "z": 1},
        "y": {"z": -2},
        "z": {}
    }
    
    adjacency2 = {
        "s": {"t": 5, "y": 10},
        "t": {"x": 1, "y": 2},
        "x": {"z": 4},
        "y": {"x": 9,"t": 3, "z": 2},
        "z": {"s": 7,"x": 6}
    }

    graph = Graph(adjacency2)
    graph2 = Graph(adjacency)
    graph.shortest_path_dijkstra("s", "z")
    graph2.shortest_path_dag("s", "z")

Dijkstra:
s
t
y
z
DAG:
s
x
y
z


## Problem 2

In [10]:
import tarfile

def extract_tar_to_dict(tar_file_path):
    extracted_files = {}

    try:
        with tarfile.open(tar_file_path, 'r:gz') as tar:
            for item in tar:
                if item.name.startswith('Data/._'):
                    continue
                if item.isfile() and 'tram' in item.name and item.name.endswith('.txt'):
                    file = tar.extractfile(item)
                    if file:
                        tram_dict = {}

                        for line in file:
                            tram_stop, time = line.decode('utf-8', errors='ignore').strip().lower().split(', ')

                            tram_dict[tram_stop] = int(time)

                        tram_number = item.name.replace('Data/', '').replace('.txt', '')
                        extracted_files[tram_number] = tram_dict
    except FileNotFoundError:
        print(f"{tar_file_path} not found.")
    except tarfile.ReadError:
        print(f"{tar_file_path} is not a tar file.")

    return extracted_files


tar_file_path = 'Data_A6.tar.gz'
extracted_dict = extract_tar_to_dict(tar_file_path)
print(extracted_dict['tram1'])

{'opaltorget': 1, 'smaragdgatan': 2, 'briljantgatan': 2, 'frölunda torg spårvagn': 1, 'positivgatan': 1, 'musikvägen': 1, 'nymilsgatan': 1, 'lantmilsgatan': 2, 'axel dahlströms torg': 1, 'marklandsgatan': 3, 'botaniska trädgården': 2, 'linnéplatsen': 1, 'olivedalsgatan': 2, 'prinsgatan': 2, 'järntorget': 2, 'stenpiren': 3, 'brunnsparken': 2, 'centralstationen': 3, 'ullevi norra': 2, 'svingeln': 1, 'olskrokstorget': 2, 'redbergsplatsen': 2, 'stockholmsgatan': 1, 'härlanda': 2, 'munkebäckstorget': 0, 'ättehögsgatan': 1, 'kaggeledstorget': 2, 'tingvallsvägen': 2, 'östra sjukhuset': 0}


In [11]:
def get_complete_tram_data(extracted_dict):
    reverse_dict = {}

    for tram, stops in extracted_dict.items():
        reversed_stops = {}
        prev_stop = None
        prev_time = None

        for stop, time in list(stops.items())[::-1]:
            if prev_stop is not None:
                reversed_stops[prev_stop] = prev_time

            prev_stop = stop
            prev_time = time

        reversed_stops[prev_stop] = 0
        reverse_tram = f"{tram}_reverse"
        reverse_dict[reverse_tram] = reversed_stops

    return {**extracted_dict, **reverse_dict}, reverse_dict


def get_tram_hubs(complete_tram_data, extracted_dict):
    all_tram_stops, tram_hubs = set(), set()
    connections_count = {}

    for inner_dict in complete_tram_data.values():
        for key in inner_dict.keys():
            all_tram_stops.add(key.lower())

    for tram_stop in all_tram_stops:
        connections_count[tram_stop] = 0

    for tram_line in extracted_dict.values():  
        stops = list(tram_line.keys())
        for i in range(len(stops) - 1):
            current_stop = stops[i]
            next_stop = stops[i + 1]
            connections_count[current_stop] += 1
            connections_count[next_stop] += 1

    for stop, count in connections_count.items():
        if count >= 3:
            tram_hubs.add(stop)

    return tram_hubs


# Using the functions:
extracted_dict = extract_tar_to_dict('Data_A6.tar.gz')
complete_tram_data, reverse_dict = get_complete_tram_data(extracted_dict)
tram_hubs = get_tram_hubs(complete_tram_data, extracted_dict)

print(tram_hubs)

{'kungsportsplatsen', 'önskevädersgatan', 'nymånegatan', 'linnéplatsen', 'ostindiegatan', 'kviberg', 'marklandsgatan', 'lantmilsgatan', 'kungssten', 'stenpiren', 'hinsholmen', 'munkebäckstorget', 'chapmans torg', 'vasaplatsen', 'nya varvsallén', 'järntorget', 'jaegerdorffsplatsen', 'lackarebäck', 'storås', 'olivedalsgatan', 'solrosgatan', 'wavrinskys plats', 'ättehögsgatan', 'mölndals sjukhus', 'hagakyrkan', 'sahlgrenska huvudentré', 'långedrag', 'tranered', 'hjällbo', 'brunnsparken', 'lana', 'varbergsgatan', 'frihamnen', 'kapellplatsen', 'korsvägen', 'sandarna', 'galileis gata', 'tingvallsvägen', 'chalmers', 'elisedal', 'olskrokstorget', 'hjalmar brantingsplatsen', 'medicinaregatan', 'kortedala torg', 'sannaplan', 'musikvägen', 'ullevi södra', 'kaptensgatan', 'friskväderstorget', 'svingeln', 'krokslätts fabriker', 'bellevue', 'teleskopgatan', 'berzeliigatan', 'eketrägatan', 'godhemsgatan', 'axel dahlströms torg', 'rambergsvallen', 'runstavsgatan', 'stigbergstorget', 'hammarkullen', 'm

In [12]:
def get_terminal_stops(extracted_dict):
    terminal_stops = []
    for line, stops in extracted_dict.items():
        last_key = list(stops.keys())[-1]
        terminal_stops.append(last_key.lower())
    return terminal_stops

terminal_stops = get_terminal_stops(extracted_dict)
all_special_stops = sorted(list(tram_hubs.union({stop.lower() for stop in terminal_stops})))

def create_tram_network(extracted_dict, all_special_stops, terminal_stops):
    tram_network = {stop: {} for stop in all_special_stops}

    for tram, stops in extracted_dict.items():
        stops_list = list(stops.items())

        idx = 0
        while idx < len(stops_list) - 1:
            # If this stop is a special stop
            if stops_list[idx][0] in all_special_stops:
                next_idx = idx + 1
                total_time = 0

                # Accumulate the time until the next special stop is reached
                while next_idx < len(stops_list) and stops_list[next_idx][0] not in all_special_stops:
                    total_time += stops_list[next_idx][1]
                    next_idx += 1

                # If the next stop is also a special stop, add to the network
                if stops_list[next_idx][0] in all_special_stops:
                    # Add the time of the next special stop to the total time
                    total_time += stops_list[next_idx][1]
                    tram_network[stops_list[idx][0]][stops_list[next_idx][0]] = total_time

                    # Compare the forward and reverse direction times
                    forward_time = tram_network.get(stops_list[idx][0], {}).get(stops_list[next_idx][0], float('inf'))
                    reverse_time = tram_network.get(stops_list[next_idx][0], {}).get(stops_list[idx][0], float('inf'))

                    if forward_time != reverse_time:
                        tram_network[stops_list[next_idx][0]][stops_list[idx][0]] = stops_list[idx][1]
                    elif stops_list[next_idx][0] in terminal_stops:
                        tram_network[stops_list[next_idx][0]][stops_list[idx][0]] = stops_list[idx][1]
                    else:
                        tram_network[stops_list[next_idx][0]][stops_list[idx][0]] = total_time

                # Update the current index
                idx = next_idx
            else:
                idx += 1

    return tram_network

tram_network = create_tram_network(extracted_dict, all_special_stops, terminal_stops)
print(tram_network)

{'allhelgonakyrkan': {'kortedala torg': 1, 'aprilgatan': 0, 'januarigatan': 2}, 'almedal': {'liseberg södra': 2, 'elisedal': 2}, 'angered centrum': {'storås': 3}, 'aprilgatan': {'allhelgonakyrkan': 1}, 'axel dahlströms torg': {'lantmilsgatan': 2, 'marklandsgatan': 3}, 'bellevue': {'skf': 2, 'kviberg': 1}, 'berzeliigatan': {'korsvägen': 1, 'valand': 2}, 'beväringsgatan': {'kviberg': 1, 'nymånegatan': 1}, 'botaniska trädgården': {'marklandsgatan': 3, 'linnéplatsen': 1, 'sahlgrenska huvudentré': 1}, 'briljantgatan': {'smaragdgatan': 1, 'frölunda torg spårvagn': 1}, 'brunnsparken': {'stenpiren': 3, 'centralstationen': 6, 'kungsportsplatsen': 3, 'domkyrkan': 3, 'frihamnen': 5, 'lilla bommen': 1}, 'centralstationen': {'brunnsparken': 2, 'ullevi norra': 2, 'ullevi södra': 4, 'gamlestads torg': 6}, 'chalmers': {'wavrinskys plats': 1, 'korsvägen': 1, 'kapellplatsen': 2}, 'chapmans torg': {'jaegerdorffsplatsen': 1, 'kaptensgatan': 2}, 'doktor sydows gata': {'wavrinskys plats': 1}, 'domkyrkan': {

### Observations
- There are several similar connections within the graph such as the one between 'gamlestads torg' and 'ejdergatan' that have stops in several trams, however in one of the trams the connection is faster than in the other. In this case we handled this by taking the shortest time between the two stops.
- The tram stops have different alphabetical structuring so when ensuring only unique stops we needed to take everything to be lowercase.
- When considering connections A->B and B->A we found that the time was not always the same. In these cases we took the reverse order to be equal to the B->A time.