In [1]:
import os
import ssl
from pathlib import Path

import pandas
import requests
from requests.exceptions import HTTPError
import pandas as pd
import xmltodict
import json
import time
from json import dumps
from datetime import datetime
from kafka import KafkaProducer

In [2]:
def make_df(js: dict) -> pd.DataFrame:
    columns = ['timestamp', 'gare', 'num', 'miss', 'term', 'date.mode', 'date.text', 'direction', 'etat']
    columns_renamed = {"miss": "trip_headsign", "term": "destination", "date.text": "heure_arrive"}
    df = pd.DataFrame.from_dict(js, orient='index')
    try:
        df = df.explode('train').reset_index(drop=True)
        df = df.join(pd.json_normalize(df['train'])).drop('train', axis=1)
        df = df.reindex(columns=columns)
        df = df.rename(columns=columns_renamed)
        df = df.rename(columns={"num": "trip_short"})
        try:
            df['direction'] = df.apply(lambda row: int((row.trip_short[-2:])) % 2, axis=1)
        except TypeError:
            # Traiter le cas où il n'y a pas de RER B dans la gare de passage, mais que d'autres train passent
            print("Error processing direction (maybe its void)")
    except KeyError:
        # Cas ou aucun train n'est présent dans la requête
        columns_renamed = {"miss": "trip_headsign", "term": "destination", "date.text": "heure_arrive"}
        print("No train value available")

    return df

In [3]:
def main():
    payload = {}
    headers = {
        'Authorization': 'Basic '
    }

    df_array = []
    url = [
        "https://api.transilien.com/gare/87001479/depart",  # Charles de Gaulles 2
        "https://api.transilien.com/gare/87271460/depart",  # Charles de Gaulles 1
        "https://api.transilien.com/gare/87271486/depart",  # Parc des expositions
        "https://api.transilien.com/gare/87271452/depart",  # Villepinte
        "https://api.transilien.com/gare/87271445/depart",  # Sevran Beaudottes

        "https://api.transilien.com/gare/87271528/depart",  # Mitry Clay
        "https://api.transilien.com/gare/87271510/depart",  # Villeparisis Mitry-le-Neuf
        "https://api.transilien.com/gare/87271437/depart",  # Vert Galant
        "https://api.transilien.com/gare/87271429/depart",  # Sevran Livry

        "https://api.transilien.com/gare/87271411/depart",  # Aulnay Sous bois
        "https://api.transilien.com/gare/87271478/depart",  # Le Blanc Mesnil
        "https://api.transilien.com/gare/87271403/depart",  # Drancy
        "https://api.transilien.com/gare/87271395/depart",  # Le Bourget
        "https://api.transilien.com/gare/87271304/depart",  # La Courneuve - Aubervilliers
        "https://api.transilien.com/gare/87164798/depart",  # La Plaine Stade-de-France
        "https://api.transilien.com/gare/87271007/depart"]  # Paris Gare-du-Nord
    response = []
    for u in url:
        try:
            feed = requests.request("GET", u, headers=headers, data=payload)
            feed.raise_for_status()
        except HTTPError as http_err:
            print(f'HTTP error occurred: {http_err}')  # Python 3.6
            return 0
        except Exception as err:
            print(f'Other error occurred: {err}')  # Python 3.6
            return 0
        else:
            response.append(feed)

    for u in response:
        """
        Pré-traitement du JSON et stockage du résultat sous la forme d'un dictionnaire dans une liste de
        dataframe
        """
        as_dict = xmltodict.parse(u.content)
        s = json.dumps(as_dict).replace('\'', '"').replace('#', '').replace('@', '')
        json_object = json.loads(s)
        df_array.append(make_df(json_object))

    df = pd.concat(df_array)
    pattern = r'^\D'  # Filtre pour récupérer uniquement les codes mission du RER B
    df.reset_index(drop=True, inplace=True)
    try:
        df = df[df['trip_short'].str.contains(pattern)]
    except ValueError:
      #  df.to_csv("debug_short_trip.csv", index=False, mode='a', header=False)
        print("Error at trip short not containing pattern (No train available maybe)")

    unix_timestamp = datetime.now().timestamp()
    # Getting date and time in local time
    datetime_obj = datetime.fromtimestamp(int(unix_timestamp))
    df = df.assign(timestamp=datetime_obj)
    # df.to_csv()
    # Sauvegarder le fichier CSV final
    output_dir = Path('../../data/processed/' + str(datetime_obj.month) + '/' + str(datetime_obj.day))
    output_file = 'data-reel.csv'
    output_dir.mkdir(parents=True, exist_ok=True)
    # df.to_csv(output_dir / output_file, index=False, mode='a', header=False)
    return df

In [4]:
jdf = main()

In [8]:
jdf_2 = main()

In [9]:
print(jdf_2)

              timestamp      gare trip_short trip_headsign destination  \
0   2022-06-04 10:45:20  87001479     KARI86          KARI    87393579   
1   2022-06-04 10:45:20  87001479     PIER90          PIER    87758896   
2   2022-06-04 10:45:20  87001479     KARI92          KARI    87393579   
3   2022-06-04 10:45:20  87001479     PIER96          PIER    87758896   
4   2022-06-04 10:45:20  87001479     KARI98          KARI    87393579   
..                  ...       ...        ...           ...         ...   
387 2022-06-04 10:45:20  87271007     PIER84          PIER    87758896   
389 2022-06-04 10:45:20  87271007     ILOT81          ILOT    87271528   
391 2022-06-04 10:45:20  87271007     KARI86          KARI    87393579   
392 2022-06-04 10:45:20  87271007     ERIC83          ERIC    87001479   
394 2022-06-04 10:45:20  87271007     SORI88          SORI    87758722   

    date.mode      heure_arrive  direction  etat  
0           R  04/06/2022 10:51          0   NaN  
1        

In [None]:
def get_traval_time(df1: pd.DataFrame, df2: pd.DataFrame) -> pd.DataFrame:


In [None]:
df_final = get_traval_time(jdf, jdf_2)

In [6]:
parsed = json.loads(jdf.to_json(orient="records"))

In [7]:
json.dumps(parsed,separators=(',', ':'))

'[{"timestamp":1654339396000,"gare":"87001479","trip_short":"PIER84","trip_headsign":"PIER","destination":"87758896","date.mode":"R","heure_arrive":"04/06/2022 10:44","direction":0,"etat":null},{"timestamp":1654339396000,"gare":"87001479","trip_short":"KARI86","trip_headsign":"KARI","destination":"87393579","date.mode":"R","heure_arrive":"04/06/2022 10:51","direction":0,"etat":null},{"timestamp":1654339396000,"gare":"87001479","trip_short":"PIER90","trip_headsign":"PIER","destination":"87758896","date.mode":"R","heure_arrive":"04/06/2022 10:59","direction":0,"etat":null},{"timestamp":1654339396000,"gare":"87001479","trip_short":"KARI92","trip_headsign":"KARI","destination":"87393579","date.mode":"R","heure_arrive":"04/06/2022 11:06","direction":0,"etat":null},{"timestamp":1654339396000,"gare":"87001479","trip_short":"PIER96","trip_headsign":"PIER","destination":"87758896","date.mode":"R","heure_arrive":"04/06/2022 11:14","direction":0,"etat":null},{"timestamp":1654339396000,"gare":"870

In [34]:
jdf.to_json(orient="records")

'[{"timestamp":1653730047000,"gare":"87001479","trip_short":"AFFY32","trip_headsign":"AFFY","destination":"87271031","date.mode":"R","heure_arrive":"28\\/05\\/2022 09:36","direction":0,"etat":null},{"timestamp":1653730047000,"gare":"87001479","trip_short":"AFFY34","trip_headsign":"AFFY","destination":"87271031","date.mode":"R","heure_arrive":"28\\/05\\/2022 09:54","direction":0,"etat":null},{"timestamp":1653730047000,"gare":"87001479","trip_short":"AFFY36","trip_headsign":"AFFY","destination":"87271031","date.mode":"R","heure_arrive":"28\\/05\\/2022 10:06","direction":0,"etat":null},{"timestamp":1653730047000,"gare":"87001479","trip_short":"AFFY38","trip_headsign":"AFFY","destination":"87271031","date.mode":"R","heure_arrive":"28\\/05\\/2022 10:24","direction":0,"etat":null},{"timestamp":1653730047000,"gare":"87001479","trip_short":"AFFY40","trip_headsign":"AFFY","destination":"87271031","date.mode":"R","heure_arrive":"28\\/05\\/2022 10:36","direction":0,"etat":null},{"timestamp":16537