# Traval Order Resolver
. = git repo root

## Data preparation

### Clean data
* Clean train station names
* Remove useless data

In [1]:

import pandas as pd
import pprint

# Data path
path = "./data/nonan_timetables.csv"

# Read data from local file
df = pd.read_csv(path,sep="\t")

# Debugg raw overview dataframe
df.head(5)

# Get all locations
traj = list(df["trajet"])

# Origins array
origin = []

# Destination array
destination = []

# Feed origin and destination arrays
for i in traj:
    splitted = i.split(" - ")
    origin.append(splitted[0].replace("Gare de ","").replace("Gare du ",""))
    destination.append(splitted[len(splitted)-1].replace("Gare de ","").replace("Gare du ",""))


# Final dataframe
endDF = pd.DataFrame()
endDF["origin"] = origin
endDF["destination"] = destination
endDF["length"] = list(df["duree"])

endDF.to_csv("./data/clean_timetables.csv",index=False)

### Build matrix

In [10]:
from scipy.sparse import csr_matrix
import numpy as np

path_cleaned = "./data/clean_timetables.csv"

# Get data
df = pd.read_csv(path_cleaned)

# Get all unique train station name
unique_gares = list(set(np.concatenate((list(df["destination"]), list(df["origin"])), axis=None)))

# Indexes for each train station
indexes_gares = list(range(0,len(unique_gares)))

print(unique_gares)

print(indexes_gares)

print(df)
# Replace every train station name with its index in the dataframe
df.replace(unique_gares,indexes_gares,inplace=True)

df.dropna(inplace=True)

df = df.astype("int64")

# Debugg - Association station name -> index
# for i in range(0,len(unique_gares)):
#     print("{} -> {}".format(unique_gares[i],indexes_gares[i]))

# Set up for the matrix
data = np.array(list(df["length"]))
row = np.array(list(df["origin"]))
col = np.array(list(df["destination"]))

# Debugg - see set up arrays
print(data,row,col)

# Matrix construction
matrix = csr_matrix((data, (row,col)))

# Export ids and length to csv
df.to_csv("./data/matricable_timetables.csv",index=False)

# Export indexes
indexes = pd.DataFrame()
indexes["unique_gares"] = unique_gares
indexes["indexes_gares"] = indexes_gares
indexes.to_csv("./data/matrixIndexes.csv",index=False)


print (matrix)

# Calling csvToMatrix()
print(matrix.toarray())
print(matrix)
print(matrix.shape)

['Sélestat-(Schweisguth)', 'Vézelise Beauregard', 'Frohmuhl- (Lavoir)', 'Nevers', 'Bas-Monistrol', 'Oissel', 'Hagondange', 'Mende', 'Romorantin-BA-G.R.', 'St-Pierre-de-Chandieu-R', 'Wissembourg', 'Pau', 'Lunel', 'Sens', 'Rouen-Rive-Droite', 'Granville', 'Cosne-sur-Loire', 'Callac', 'Libourne', 'Ambert', 'Mont-de-Marsan', 'Calais Fréthun', 'St-Denis-près-Martel', "Bois-d'Oingt-Centre", 'St Quentin', 'Le Mont St Michel', 'Amiens', 'Nogent-le-Rotrou', 'Hyères', 'Brou', 'La Souterraine', 'Le Puy-en-Velay', 'Lunéville', 'Ancizes-St-Georges-Bour', 'Latour de Carol', 'Chamberry-Jacob', 'Droué (Centre)', 'Laveline-d-Bruy. Mairie', 'Wesserling', 'Reipertswiller-SP.', 'Busigny', 'Plouaret-Trégor', 'Etang', 'Eymoutiers-Vassiviere', 'Neuchâtel', 'Chedde', 'Bourg-en-Bresse Gare R', 'Toury', 'Annemasse', 'Laon', 'Blois-Chambord', 'Bourg-St-Maurice', 'St-Gilles-Croix-de-Vie', 'Lozanne', 'Laqueuille', 'Poitiers-Gare-Routière', 'Paris Nord', 'Riquewihr-Poste', 'Egletons', 'Annonay-Gare-Route', 'Issoire

## Raw Data Visualization

In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from pyvis.network import Network
import numpy as np 

df = pd.read_csv("data/clean_timetables.csv")


net=Network()
i=0
dict_gare={}
unique_gares = list(set(np.concatenate((list(df["destination"]), list(df["origin"])), axis=None)))
for gare in unique_gares :
    dict_gare[gare]=i
    net.add_node(i,label=gare)
    i+=1

for i in df.index:
    dep=dict_gare[df['origin'][i]]
    arr=dict_gare[df['destination'][i]]
    w=int(df['length'][i])
    net.add_edge(dep,arr,value=w)

net.repulsion(node_distance=400, spring_length=400)
net.show_buttons(filter_=True)
net.show('dataviz/edges_with_weights.html')



## Speech Recognition

In [None]:
import speech_recognition as sr

speech = './tests/Céline-Paris.wav'

r = sr.Recognizer()

with sr.AudioFile(speech) as source:
    # listen for the data (load audio to memory)
    audio_data = r.record(source)
    # recognize (convert from speech to text)
    text = r.recognize_google(audio_data, language="fr-FR")
    print(text)

## Retrieve Departure & Destination

### Get cities names

In [None]:
txt = "Je veux aller de Paris a Besançon en passant par Lyon Elon Musk"
import spacy

nlp = spacy.load("fr_core_news_sm")

doc = nlp(txt)

for i in doc.ents:
    print(i.text, i.start_char, i.end_char, i.label_)

### Get departure & destination

## Shortest Path

[592 492 215 ... 471 685 684]
The shortest distance of  a  from the source vertex a is: 0
The shortest distance of  b  from the source vertex a is: 3
The shortest distance of  c  from the source vertex a is: 3.5
The shortest distance of  d  from the source vertex a is: 4.5


## Result(s) Visualization

# TODOS 
## DEALINE 24/12
* Sylvain -> Dijsktra matrice + add doc here
* Samantha -> Doc vulgarisée + data viz des resultats
* Aymerick -> Fleche de diretions
* Paul -> main.py + fleches de directions + finir clean data (enlever nan values)

# Question pédago
* Application ?
