# Assignment 6
## Group Members:
* ### Nils Dunlop, e-mail: gusdunlni@student.gu.se
* ### Francisco Alejandro Erazo Piza, e-mail: guserafr@student.gu.se

## Problem 1

In [95]:
import math
from collections import deque

class Graph:
    # Initialize the graph components
    def __init__(self, gdict={}):
        self.gdict = gdict
        self.distance = {} # Distance from source s to each vertex
        self.colour = {} # Colour of each vertex
        self.predecessor = {}  # Predecessor of each vertex
        self.finish = {}  # Finish time of each vertex

    # Return all vertices of the graph
    def get_vertices(self):
        return list(self.gdict.keys())

    # Return all edges of the graph
    def get_edges(self):
        edges = []
        for vertex in self.gdict:
            for next_vertex in self.gdict[vertex]:
                if (vertex, next_vertex) not in edges:
                    edges.append((vertex, next_vertex))
        return edges

    # Print the shortest path from source s to vertex v
    def print_path(self, s, v):
        if v in self.gdict.keys():
            if v == s:
                print(s)
            elif self.predecessor[v] == None:
                print("There is no path from", s, "to", v, "exists.")
            else:
                self.print_path(s, self.predecessor[v])
                print(v)
        else:
            print("Node with key", v, "is not in the graph.")

    # Initialize distance and predecessor for each vertex
    def initialise_single_source(self, s):
        for v in self.get_vertices():
            self.distance[v] = math.inf
            self.predecessor[v] = None
        self.distance[s] = 0

    # Get weight of the edge from u to v
    def get_weight(self, u, v):
        return self.gdict[u][v]

    # Update distance and predecessor if a shorter path is found
    def relax(self, u, v):
        if self.distance[v] > self.distance[u] + self.get_weight(u, v):
            self.distance[v] = self.distance[u] + self.get_weight(u, v)
            self.predecessor[v] = u

    # Dijkstra's algorithm for finding the shortest path
    def dijkstra(self, s):
        # Initialize distances and predecessors for all nodes
        self.initialise_single_source(s)
        
        # Priority queue to process nodes
        # Initialize the priority queue with the source node and a distance of 0
        priority_queue = [(0, s)]

        while priority_queue:
            # Sort the queue by distance, with the shortest distance at the front
            priority_queue.sort(reverse=True, key=lambda x: x[0])
            
            # Get the node with the shortest distance
            current_cost, current_node = priority_queue.pop()

            # Relax the edges for each neighbor of the current node
            for neighbor in self.gdict[current_node]:
                old_distance = self.distance[neighbor]
                self.relax(current_node, neighbor)

                # If the distance to the neighbor has been updated, add it to the queue
                if self.distance[neighbor] != old_distance:
                    priority_queue.append((self.distance[neighbor], neighbor))

    # DAG shortest path algorithm for directed acyclic graphs
    def dag_shortest_path(self, s):
        # Initialize distances and predecessors for all nodes
        self.initialise_single_source(s)
        
        # Get the nodes in topological order
        sorted_nodes = self.topological_sort()

        for node in sorted_nodes:
            # Relax all edges from the current node to its neighbors
            for neighbor in self.gdict[node]:
                self.relax(node, neighbor)

    # Topological sort using depth-first search
    def topological_sort(self):
        # List to store the sorted order of node
        sorted_nodes = deque()
        
        # Set to track visited nodes
        visited = set()
        
        # Depth-first search function
        def dfs(node):
            # Mark the current node as visited
            visited.add(node)
            
            # Recursively visit all unvisited neighbors
            for neighbor in self.gdict[node]:
                if neighbor not in visited:
                    dfs(neighbor)
                    
            # Once all neighbors are visited, add the current node to the start of the sorted list
            sorted_nodes.appendleft(node)

        # Start DFS from each unvisited node to ensure all nodes are processed
        for node in self.gdict:
            if node not in visited:
                dfs(node)

        return sorted_nodes

    # Print the shortest path using Dijkstra's algorithm
    def shortest_path_dijkstra(self, s, v):
        self.dijkstra(s)
        self.print_path(s, v)
        print(f"Total time: {self.distance[v]} minutes\n")

    # Print the shortest path using Dag shortest path's algorithm
    def shortest_path_dag(self, s, v):
        self.dag_shortest_path(s)
        self.print_path(s, v)
        print(f"Total time: {self.distance[v]} minutes\n")

# Input graphs
adjacency = {
    "r": {"s": 5, "t": 3},
    "s": {"x": 6, "t": 2},
    "t": {"x": 7, "y": 4, "z": 2},
    "x": {"y": -1, "z": 1},
    "y": {"z": -2},
    "z": {}
}

adjacency2 = {
    "s": {"t": 5, "y": 10},
    "t": {"x": 1, "y": 2},
    "x": {"z": 4},
    "y": {"x": 9,"t": 3, "z": 2},
    "z": {"s": 7,"x": 6}
}

graph = Graph(adjacency2)
graph2 = Graph(adjacency)
graph.shortest_path_dijkstra("s", "z")
graph2.shortest_path_dag("s", "z")

s
t
y
z
Total time: 9 minutes

s
x
y
z
Total time: 3 minutes


## Problem 2

In [88]:
import tarfile

def extract_tar_to_dict(tar_file_path):
    extracted_files = {}

    try:
        with tarfile.open(tar_file_path, 'r:gz') as tar:
            for item in tar:
                if item.name.startswith('Data/._'):
                    continue
                if item.isfile() and 'tram' in item.name and item.name.endswith('.txt'):
                    file = tar.extractfile(item)
                    if file:
                        tram_dict = {}

                        for line in file:
                            tram_stop, time = line.decode('utf-8', errors='ignore').strip().lower().split(', ')

                            tram_dict[tram_stop] = int(time)

                        tram_number = item.name.replace('Data/', '').replace('.txt', '')
                        extracted_files[tram_number] = tram_dict
    except FileNotFoundError:
        print(f"{tar_file_path} not found.")
    except tarfile.ReadError:
        print(f"{tar_file_path} is not a tar file.")

    return extracted_files


tar_file_path = 'Data_A6.tar.gz'
extracted_dict = extract_tar_to_dict(tar_file_path)

In [89]:
def get_complete_tram_data(extracted_dict):
    reverse_dict = {}

    for tram, stops in extracted_dict.items():
        reversed_stops = {}
        prev_stop = None
        prev_time = None

        for stop, time in list(stops.items())[::-1]:
            if prev_stop is not None:
                reversed_stops[prev_stop] = prev_time

            prev_stop = stop
            prev_time = time

        reversed_stops[prev_stop] = 0
        reverse_tram = f"{tram}_reverse"
        reverse_dict[reverse_tram] = reversed_stops

    return {**extracted_dict, **reverse_dict}, reverse_dict


def get_tram_hubs(complete_tram_data, extracted_dict):
    all_tram_stops, tram_hubs = set(), set()
    connections_count = {}

    for inner_dict in complete_tram_data.values():
        for key in inner_dict.keys():
            all_tram_stops.add(key.lower())

    for tram_stop in all_tram_stops:
        connections_count[tram_stop] = 0

    for tram_line in extracted_dict.values():  
        stops = list(tram_line.keys())
        for i in range(len(stops) - 1):
            current_stop = stops[i]
            next_stop = stops[i + 1]
            connections_count[current_stop] += 1
            connections_count[next_stop] += 1

    for stop, count in connections_count.items():
        if count >= 3:
            tram_hubs.add(stop)

    return tram_hubs


# Using the functions:
extracted_dict = extract_tar_to_dict('Data_A6.tar.gz')
complete_tram_data, reverse_dict = get_complete_tram_data(extracted_dict)
tram_hubs = get_tram_hubs(complete_tram_data, extracted_dict)

In [90]:
def get_terminal_stops(extracted_dict):
    terminal_stops = []
    for line, stops in extracted_dict.items():
        first_key = list(stops.keys())[0]
        last_key = list(stops.keys())[-1]

        terminal_stops.append(first_key.lower())
        terminal_stops.append(last_key.lower())

    return list(set(terminal_stops))


terminal_stops = get_terminal_stops(extracted_dict)
all_special_stops = sorted(list(tram_hubs.union({stop.lower() for stop in terminal_stops})))

In [91]:
def build_tram_network_graph(reverse_dict, all_special_stops):
    graph = {}

    # Initialize the graph with terminal stops and hubs
    for stop in all_special_stops:
        graph[stop] = {}
    # Create edges for each tram line
    for tram_lines, stops in reverse_dict.items():
        stop_names = list(stops.keys())
        for i in range(len(stop_names) - 1):
            current_stop = stop_names[i]
            next_stop = stop_names[i + 1]
            current_time = stops[current_stop]
            next_time = stops[next_stop]

            # Only process if both stops are either terminal stops or hubs
            if current_stop in graph and next_stop in graph:
                # Check if there's already a connection (due to reverse trams) and pick the shorter time if so
                if next_stop in graph[current_stop]:
                    graph[current_stop][next_stop] = min(graph[current_stop][next_stop], next_time)
                    # print(f"1. CurrentStop:{current_stop} - NextStop:{next_stop}: {graph[current_stop][next_stop]}")
                else:
                    graph[current_stop][next_stop] =  next_time
                    # print(f"2. CurrentStop:{current_stop} - NextStop:{next_stop}: {graph[current_stop][next_stop]}")

                # Also add the reverse connection (since it's undirected)
                if current_stop in graph[next_stop]:
                    graph[next_stop][current_stop] = min(graph[next_stop][current_stop], current_time)
                    # print(f"3. NextStop:{next_stop} - CurrentStop:{current_stop}: {graph[next_stop][current_stop]}")
                else:
                    graph[next_stop][current_stop] = current_time
                    # print(f"4. NextStop:{next_stop} - CurrentStop:{current_stop}: {graph[next_stop][current_stop]}")

    return graph

tram_network_graph = build_tram_network_graph(reverse_dict, all_special_stops)

# for stop, connections in tram_network_graph.items():
#     print(stop, connections)

In [92]:
graph = Graph(tram_network_graph)

# Shortest route from Chalmers to Centralstationen
print("Shortest route from Chalmers to Centralstationen using Dijkstra's algorithm:")
graph.shortest_path_dijkstra("chalmers", "centralstationen")

print("Shortest route from Chalmers to Centralstationen using DAG shortest path algorithm:")
graph.shortest_path_dag("chalmers", "centralstationen")

# Shortest route from Saltholmen to Chalmers
print("Shortest route from Saltholmen to Chalmers using Dijkstra's algorithm:")
graph.shortest_path_dijkstra("saltholmen", "chalmers")

print("Shortest route from Saltholmen to Chalmers using DAG shortest path algorithm:")
graph.shortest_path_dag("saltholmen", "chalmers")

Shortest route from Chalmers to Centralstationen using Dijkstra's algorithm:
chalmers
korsvägen
scandinavium
ullevi södra
centralstationen
Total time: 4 minutes

Shortest route from Chalmers to Centralstationen using DAG shortest path algorithm:
chalmers
korsvägen
scandinavium
ullevi södra
centralstationen
Total time: 4 minutes

Shortest route from Saltholmen to Chalmers using Dijkstra's algorithm:
saltholmen
roddföreningen
långedrag
hinsholmen
käringberget
tranered
hagen
nya varvsallén
kungssten
sandarna
sannaplan
mariaplan
marklandsgatan
botaniska trädgården
sahlgrenska huvudentré
medicinaregatan
wavrinskys plats
chalmers
Total time: 23 minutes

Shortest route from Saltholmen to Chalmers using DAG shortest path algorithm:
There is no path from saltholmen to chalmers exists.
Total time: inf minutes


### Observations
- There are several similar connections within the graph such as the one between 'gamlestads torg' and 'ejdergatan' that have stops in several trams, however in one of the trams the connection is faster than in the other. In this case we handled this by taking the shortest time between the two stops.
- The tram stops have different alphabetical structuring so when ensuring only unique stops we needed to take everything to be lowercase.
- When considering connections A->B and B->A we found that the time was not always the same. In these cases we took the reverse order to be equal to the B->A time.