# Import data from text 

## Goal 
Take the data from text and import them in python 

## Shape of the text data

### Nodes
Each node has 
* id 
* label
* pos (latitudine)
* pos (longitudine)
* tipologia -> deposito

Moreover, the bus stop nodes have
* *id*
* *label*
* *pos (latitudine)*
* *pos (longitudine)*
* *tipologia -> fermata*
* locazione (via)
* città (? id of the city)
* ragazzi (ragazzi for the first cluster)
* ragazzi (ragazzi for the second cluster)
* ragazzi (ragazzi for the third cluster)
* ragazzi (ragazzi for the fourth cluster)

Then, the cluster node have
* id 
* label
* pos
* pos
* tipologia -> cluster 
* città

### Edges 
Each edge has the following data   
* source (from where the edge starts)
* target (to where the edge arrives)
* weight ( for instance 731.3711208488347, I would say it is the distance)
* time (for instance 24.379037361627823, the time needed from the source to the target)

In [1]:
import networkx as nx
import pandas as pd

# Import complete data

## First version

In [3]:
def parse_graph(file_content):
    nodes = {}
    edges = []

    current_node = None
    current_edge = None

    for line in file_content.splitlines():
        line = line.strip()
        if line.startswith("node"):
            current_node = {}
        elif line.startswith("edge"):
            current_edge = {}
        elif line.startswith("id"):
            current_node['id'] = int(line.split()[1])
        elif line.startswith("label"):
            current_node['label'] = line.split()[1].strip('"')
        elif line.startswith("pos"):
            if 'pos' in current_node:
                current_node['pos'].append(float(line.split()[1]))
            else:
                current_node['pos'] = [float(line.split()[1])]
        elif line.startswith("tipologia"):
            current_node['tipologia'] = line.split()[1].strip('"')
        elif line.startswith("locazione"):
            current_node['locazione'] = line.split()[1].strip('"')
        elif line.startswith("citta"):
            current_node['citta'] = int(line.split()[1])
        elif line.startswith("ragazzi"):
            if 'ragazzi' in current_node:
                current_node['ragazzi'].append(int(line.split()[1]))
            else:
                current_node['ragazzi'] = [int(line.split()[1])]
        elif line.startswith("source"):
            current_edge['source'] = int(line.split()[1])
        elif line.startswith("target"):
            current_edge['target'] = int(line.split()[1])
        elif line.startswith("weight"):
            current_edge['weight'] = float(line.split()[1])
        elif line.startswith("time"):
            current_edge['time'] = float(line.split()[1])
        elif line == "]":
            if current_node is not None:
                nodes[current_node['id']] = current_node
                current_node = None
            if current_edge is not None:
                edges.append(current_edge)
                current_edge = None

    return nodes, edges

def create_graph(nodes, edges):
    G = nx.DiGraph()  # Create a directed graph

    for node_id, node_data in nodes.items():
        G.add_node(node_id, **node_data)

    for edge in edges:
        G.add_edge(edge['source'], edge['target'], weight=edge['weight'], time=edge['time'])

    return G

# Read the file content
file_path = r'C:\Users\Samsung\Desktop\22_Internships\ProgettoTesi-main\Dati\BUTTRIO\matriceDistanze\Grafo_singolo'
with open(file_path, 'r') as file:
    file_content = file.read()

# Parse the file and create the graph
nodes, edges = parse_graph(file_content)
G = create_graph(nodes, edges)

# Now you can use the graph G
print(G.nodes(data=True))
print(G.edges(data=True))


[(0, {'id': 0, 'label': '0', 'pos': [13.33352521881336, 46.010049138194574], 'tipologia': 'deposito'}), (1, {'id': 1, 'label': '1', 'pos': [13.333070548499375, 46.011592135492116], 'tipologia': 'fermata', 'locazione': 'VIAIPPOLITONIEVO21', 'ragazzi': [2, 0]}), (2, {'id': 2, 'label': '2', 'pos': [13.332481034239262, 46.00685608068721], 'tipologia': 'fermata', 'locazione': 'VIABELTRAME23', 'ragazzi': [0, 17]}), (3, {'id': 3, 'label': '3', 'pos': [13.332006889588357, 46.011779665523946], 'tipologia': 'fermata', 'locazione': 'VIAGORIZIA17', 'ragazzi': [7, 4]}), (4, {'id': 4, 'label': '4', 'pos': [13.339888208550311, 46.01912388511622], 'tipologia': 'fermata', 'locazione': 'VIALIPPE,FERMATAN.7', 'ragazzi': [0, 2]}), (5, {'id': 5, 'label': '5', 'pos': [13.333202605873304, 46.018368509384324], 'tipologia': 'fermata', 'locazione': 'VIADEGANUTTI&#10;FERMATAN3', 'ragazzi': [0, 4]}), (6, {'id': 6, 'label': '6', 'pos': [13.340102597322604, 45.99918361441391], 'tipologia': 'fermata', 'locazione': '

## Second version (CURRENT)

In [16]:
def parse_graph(file_content):
    nodes = {}
    edges = []

    current_node = None
    current_edge = None

    for line in file_content.splitlines():
        line = line.strip()
        if line.startswith("node"):
            current_node = {}
        elif line.startswith("edge"):
            current_edge = {}
        elif line.startswith("id"):
            current_node['id'] = int(line.split()[1])
        elif line.startswith("label"):
            current_node['label'] = line.split()[1].strip('"')
        elif line.startswith("pos"):
            if 'pos' in current_node:
                current_node['pos'].append(float(line.split()[1]))
            else:
                current_node['pos'] = [float(line.split()[1])]
        elif line.startswith("tipologia"):
            current_node['tipologia'] = line.split()[1].strip('"')
        elif line.startswith("locazione"):
            current_node['locazione'] = line.split()[1].strip('"')
        elif line.startswith("citta"):
            current_node['citta'] = int(line.split()[1])
        elif line.startswith("ragazzi"):
            if 'ragazzi' in current_node:
                current_node['ragazzi'].append(int(line.split()[1]))
            else:
                current_node['ragazzi'] = [int(line.split()[1])]
        elif line.startswith("source"):
            current_edge['source'] = int(line.split()[1])
        elif line.startswith("target"):
            current_edge['target'] = int(line.split()[1])
        elif line.startswith("weight"):
            current_edge['weight'] = float(line.split()[1])
        elif line.startswith("time"):
            current_edge['time'] = float(line.split()[1])
        elif line == "]":
            if current_node is not None:
                # Ensure ragazzi list has 4 elements if necessary
                if 'ragazzi' in current_node:
                    while len(current_node['ragazzi']) < 4:
                        current_node['ragazzi'].append(0)
                nodes[current_node['id']] = current_node
                current_node = None
            if current_edge is not None:
                edges.append(current_edge)
                current_edge = None

    return nodes, edges

def create_graph(nodes, edges):
    G = nx.DiGraph()  # Create a directed graph

    for node_id, node_data in nodes.items():
        G.add_node(node_id, **node_data)

    for edge in edges:
        G.add_edge(edge['source'], edge['target'], weight=edge['weight'], time=edge['time'])

    return G
    

In [35]:
# Read the file content
file_path = r'C:\Users\Samsung\Desktop\22_Internships\ProgettoTesi-main\Dati\BUTTRIO\matriceDistanze\Grafo_singolo_real_testo.txt'
with open(file_path, 'r') as file:
    file_content = file.read()

# Parse the file and create the graph
nodes, edges = parse_graph(file_content)
G = create_graph(nodes, edges)

In [36]:
# Print 

#print(G.nodes(data=True))
#print(G.edges(data=True))

for n in G.nodes(data=True):
    if n[0] < 25 :
        print(n)

print()
for e in G.edges(data=True):
    if e[0] == 0 and e[1] < 10:
        print(e)

(0, {'id': 0, 'label': '0', 'pos': [13.33352521881336, 46.010049138194574], 'tipologia': 'deposito'})
(1, {'id': 1, 'label': '1', 'pos': [13.333070548499375, 46.011592135492116], 'tipologia': 'fermata', 'locazione': 'VIAIPPOLITONIEVO21', 'ragazzi': [2, 0, 0, 0]})
(2, {'id': 2, 'label': '2', 'pos': [13.332481034239262, 46.00685608068721], 'tipologia': 'fermata', 'locazione': 'VIABELTRAME23', 'ragazzi': [0, 17, 0, 0]})
(3, {'id': 3, 'label': '3', 'pos': [13.332006889588357, 46.011779665523946], 'tipologia': 'fermata', 'locazione': 'VIAGORIZIA17', 'ragazzi': [7, 4, 0, 0]})
(4, {'id': 4, 'label': '4', 'pos': [13.339888208550311, 46.01912388511622], 'tipologia': 'fermata', 'locazione': 'VIALIPPE,FERMATAN.7', 'ragazzi': [0, 2, 0, 0]})
(5, {'id': 5, 'label': '5', 'pos': [13.333202605873304, 46.018368509384324], 'tipologia': 'fermata', 'locazione': 'VIADEGANUTTI&#10;FERMATAN3', 'ragazzi': [0, 4, 0, 0]})
(6, {'id': 6, 'label': '6', 'pos': [13.340102597322604, 45.99918361441391], 'tipologia': 'f

# Import distance and time matrices

In [8]:
def read_excel_matrix(file_path):
    try:
        # Read the Excel file into a DataFrame
        df = pd.read_excel(file_path, header=None)

        # Convert the DataFrame to a 2D list
        distance_matrix = df.values.tolist()

        return distance_matrix

    except Exception as e:
        print("Error:", e)


In [9]:
# Application
file_path = r'C:\Users\Samsung\Desktop\22_Internships\ProgettoTesi-main\Dati\BUTTRIO\matriceDistanze\DistanceMatrix.xlsx'
distance_matrix = read_excel_matrix(file_path)

for r in distance_matrix:
    print(r)

[nan, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0]
[0.0, 0.0, 360.4, 866.2, 164.9, 1053.0, 775.6, 2103.2, 253.6, 950.8, 1999.2, 965.2, 230.0, 1576.8, 868.4, 407.4, 480.6, 1896.7, 772.3]
[1.0, 314.8, 0.0, 1110.2, 149.9, 1235.5, 958.0, 2347.2, 497.7, 1133.3, 2243.3, 1147.6, 474.0, 1759.2, 1050.9, 589.8, 724.6, 2140.7, 954.8]
[2.0, 804.0, 1048.0, 0.0, 898.1, 1857.0, 1579.6, 1502.6, 681.0, 1754.8, 1398.6, 1769.2, 574.0, 2380.8, 1553.9, 1211.3, 907.9, 1296.0, 1576.3]
[3.0, 164.9, 149.9, 960.3, 0.0, 1217.9, 940.5, 2197.3, 347.8, 1115.7, 2093.4, 1130.1, 324.1, 1741.7, 1033.3, 572.3, 574.7, 1990.8, 937.3]
[4.0, 1053.0, 1235.5, 1919.2, 1217.9, 0.0, 673.0, 3156.2, 1306.7, 401.5, 3052.2, 1297.6, 1283.0, 523.8, 1320.2, 1112.4, 1533.6, 2949.7, 280.7]
[5.0, 842.1, 1024.5, 1708.2, 1007.0, 673.0, 0.0, 2945.2, 1095.7, 385.8, 2841.3, 607.0, 1072.0, 939.0, 857.8, 800.5, 1322.6, 2738.7, 590.2]
[6.0, 2041.0, 2285.0, 1502.6, 2135.2, 3094.0, 2816.6, 0.0, 1

In [10]:
# Application
file_path = r'C:\Users\Samsung\Desktop\22_Internships\ProgettoTesi-main\Dati\BUTTRIO\matriceDistanze\TimeMatrix.xlsx'
distance_matrix = read_excel_matrix(file_path)

for r in distance_matrix:
    print(r)

[nan, 0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 17.0]
[0.0, 0.0, 49.2, 80.0, 24.7, 93.2, 78.4, 204.3, 30.0, 98.6, 181.7, 87.5, 20.7, 174.2, 90.1, 44.1, 62.7, 168.2, 69.8]
[1.0, 48.8, 0.0, 128.7, 26.3, 126.4, 111.6, 253.0, 78.7, 131.8, 230.4, 120.7, 69.4, 207.4, 123.3, 77.3, 111.4, 216.9, 103.0]
[2.0, 80.8, 128.9, 0.0, 100.5, 174.0, 159.2, 167.6, 76.9, 179.4, 145.0, 168.3, 60.1, 255.0, 151.7, 124.9, 109.6, 131.5, 150.6]
[3.0, 22.5, 28.4, 102.4, 0.0, 115.7, 100.9, 226.7, 52.4, 121.1, 204.1, 110.0, 43.1, 196.7, 112.6, 66.6, 85.1, 190.6, 92.3]
[4.0, 93.3, 129.8, 173.3, 118.0, 0.0, 63.9, 297.6, 123.3, 39.8, 275.0, 120.6, 114.0, 81.0, 123.7, 98.4, 156.0, 261.5, 23.1]
[5.0, 95.4, 131.9, 175.4, 120.1, 67.2, 0.0, 299.7, 125.4, 49.7, 277.1, 57.8, 116.1, 138.6, 82.8, 83.0, 158.1, 263.6, 63.1]
[6.0, 202.4, 250.5, 169.6, 222.1, 295.6, 280.8, 0.0, 198.5, 301.0, 21.0, 289.9, 181.7, 376.6, 273.3, 246.5, 199.1, 47.2, 272.2]
[7.0, 28.4, 76.5, 76.2, 48.1,