# **Conversion des fichiers RDF**

### Chicago (RDF) : Transformation du format RDF vers CSV via un script Python.

In [20]:
import pandas as pd
from rdflib import Graph,Namespace

##### CTAChicago-Ridership-BusRoutes-DailyTypeAverages&Totals.rdf -----------> to .csv

In [21]:
# Charger le RDF
g = Graph()         #Permet de charger rdf en memoire
g.parse("../data/raw/CTAChicago-Ridership-BusRoutes-DailyTypeAverages&Totals.rdf", format="xml")    #Permet la lecture du fichier

# Définition des namespaces
DS = Namespace("https://data.cityofchicago.org/resource/jyb9-n7fm/")


data = []

# Parcourir chaque observation
for obs in g.subjects(predicate=DS.route):
    route = g.value(obs, DS.route)
    date = g.value(obs, DS.date)
    daytype = g.value(obs, DS.daytype)
    rides = g.value(obs, DS.rides)

    # Vérifier que l'observation est complète
    if route and date and daytype and rides:
        data.append({
            "route": str(route),
            "date": str(date)[:10],   # YYYY-MM-DD
            "day_type": str(daytype),
            "rides": int(rides)
        })

# Création DataFrame
df = pd.DataFrame(data)

# Export CSV
df.to_csv("../data/raw/chicago_ridership_BusRoutes-DailyTypeAverages_Totals.csv", index=False)

print("Conversion RDF → CSV réussie")
print(df.head())

Conversion RDF → CSV réussie
  route        date day_type  rides
0     3  2001-01-01        U   7354
1     3  2001-01-02        W  16697
2     3  2001-01-03        W  18195
3     3  2001-01-04        W  19424
4     3  2001-01-05        W  21221


##### CTAChicago-Ridership-BusRoutes-MonthlyDay-TypeAverages&Totals.rdf ----------->  to csv

In [22]:
# Charger le RDF
g = Graph()         #Permet de charger rdf en memoire
g.parse("../data/raw/CTAChicago-Ridership-BusRoutes-MonthlyDay-TypeAverages&Totals.rdf", format="xml")    #Permet la lecture du fichier

# Définition des namespaces
DS = Namespace("https://data.cityofchicago.org/resource/bynn-gwxy/")


data = []

# Parcourir chaque observation
for obs in g.subjects(predicate=DS.route):
    route = g.value(obs, DS.route)
    routename = g.value(obs, DS.routename)
    month_beginning = g.value(obs, DS.month_beginning)
    avg_weekday_rides = g.value(obs, DS.avg_weekday_rides)
    avg_saturday_rides = g.value(obs, DS.avg_saturday_rides)
    avg_sunday_holiday_rides = g.value(obs, DS.avg_sunday_holiday_rides)
    monthtotal = g.value(obs, DS.monthtotal)

    # Vérifier que l'observation est complète
    if route and routename and month_beginning and avg_weekday_rides and avg_saturday_rides and avg_sunday_holiday_rides and monthtotal:
        data.append({
            "route": str(route),
            "routename": str(routename),
            "month_beginning": str(month_beginning)[:10],   # YYYY-MM-DD
            "avg_weekday_rides": float(avg_weekday_rides),
            "avg_saturday_rides": float(avg_saturday_rides),
            "avg_sunday_holiday_rides": float(avg_sunday_holiday_rides),
            "monthtotal": int(monthtotal),
        })

# Création DataFrame
df = pd.DataFrame(data)

# Export CSV
df.to_csv("../data/raw/chicago_ridership-BusRoutes-MonthlyDay-TypeAverages_Totals.csv", index=False)

print("Conversion RDF → CSV réussie")
print(df.head())

Conversion RDF → CSV réussie
  route          routename month_beginning  avg_weekday_rides  \
0     1  Indiana/Hyde Park      2001-01-01             6982.6   
1     1  Indiana/Hyde Park      2001-02-01             7401.4   
2     1  Indiana/Hyde Park      2001-03-01             7571.8   
3     1  Indiana/Hyde Park      2001-04-01             6875.1   
4     2  Hyde Park Express      2001-01-01             1000.0   

   avg_saturday_rides  avg_sunday_holiday_rides  monthtotal  
0                 0.0                       0.0      153617  
1                 0.0                       0.0      148028  
2                 0.0                       0.0      166579  
3                 0.0                       0.0      144377  
4                 0.0                       0.0       22001  
