In [189]:
import pandas as pd
import numpy as np
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import scipy.stats as st
from sklearn.linear_model import LinearRegression
from numpy.linalg import inv
from numpy.random import normal as rnorm
from statsmodels.stats.outliers_influence import OLSInfluence

In [190]:
blablacar_data = pd.read_csv('blablacar_data.csv')
blablacar_data['duration'] = blablacar_data['duration']/60
blablacar_data['distance_in_meters'] = blablacar_data['distance_in_meters']/1000
blablacar_data.rename(columns={'price': 'blablacar_price','duration': 'blablacar_duration_in_min' , 'distance_in_meters':'blablacar_distance_in_km' }, inplace=True)
blablacar_data = blablacar_data.assign(blablacar_price_per_km=(blablacar_data['blablacar_price'] / blablacar_data['blablacar_distance_in_km'] ))
blablacar_data = blablacar_data.assign(route=(blablacar_data['dep_city']+','+blablacar_data['arr_city'] ))

In [191]:
blablacar_data.head()

Unnamed: 0,region,dep_city,dep_date,dep_time_slot,arr_city,arr_time_slot,blablacar_price,blablacar_distance_in_km,blablacar_duration_in_min,blablacar_price_per_km,route
0,0.0,Paris,20-01-23,4.0,Nantes,4.0,32.0,370.348,230.0,0.086405,"Paris,Nantes"
1,0.0,Paris,20-01-23,1.0,Nantes,1.0,25.0,380.185,240.0,0.065757,"Paris,Nantes"
2,0.0,Paris,20-01-23,1.0,Nantes,2.0,29.0,396.878,290.0,0.07307,"Paris,Nantes"
3,0.0,Paris,20-01-23,1.0,Nantes,2.0,27.0,360.571,220.0,0.074881,"Paris,Nantes"
4,0.0,Paris,20-01-23,1.0,Nantes,2.0,31.0,382.781,240.0,0.080986,"Paris,Nantes"


In [192]:
blablacar_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8889 entries, 0 to 8888
Data columns (total 11 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   region                     8889 non-null   float64
 1   dep_city                   8889 non-null   object 
 2   dep_date                   8889 non-null   object 
 3   dep_time_slot              8889 non-null   float64
 4   arr_city                   8889 non-null   object 
 5   arr_time_slot              8889 non-null   float64
 6   blablacar_price            8889 non-null   float64
 7   blablacar_distance_in_km   8889 non-null   float64
 8   blablacar_duration_in_min  8889 non-null   float64
 9   blablacar_price_per_km     8889 non-null   float64
 10  route                      8889 non-null   object 
dtypes: float64(7), object(4)
memory usage: 764.0+ KB


In [193]:
trains_data = pd.read_csv('trains_data.csv')
trains_data['duration'] = trains_data['duration']/60
trains_data['train_distance'] = trains_data['train_distance']/1000
trains_data.rename(columns={'standard_class_price': 'train_price','duration': 'train_duration_in_min' , 'train_distance':'train_distance_in_km' , 'nb_changes': 'train_nb_changes' }, inplace=True)
trains_data = trains_data.assign(train_price_per_km=(trains_data['train_price'] / trains_data['train_distance_in_km'] ))
trains_data = trains_data.assign(route=(trains_data['dep_city']+','+trains_data['arr_city'] ))

In [194]:
trains_data.head()

Unnamed: 0,region,dep_city,dep_date,dep_time_slot,arr_city,arr_time_slot,train_duration_in_min,train_nb_changes,train_name_1,train_name_2,train_price,BlablacarBus_available,train_distance_in_km,train_price_per_km,route
0,0.0,Paris,20-01-23,1.0,Nantes,1.0,142.0,0.0,TGV Inoui,,50.0,No,342.702025,0.145899,"Paris,Nantes"
1,0.0,Paris,20-01-23,1.0,Nantes,1.0,184.0,1.0,Ouigo,SNCF,37.0,No,342.702025,0.107966,"Paris,Nantes"
2,0.0,Paris,20-01-23,1.0,Nantes,1.0,173.0,1.0,TGV Inoui,SNCF,113.0,No,342.702025,0.329733,"Paris,Nantes"
3,0.0,Paris,20-01-23,1.0,Nantes,1.0,249.0,1.0,TGV Inoui,SNCF,104.0,No,342.702025,0.303471,"Paris,Nantes"
4,0.0,Paris,20-01-23,1.0,Nantes,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"


In [195]:
trains_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16292 entries, 0 to 16291
Data columns (total 15 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   region                  16292 non-null  float64
 1   dep_city                16292 non-null  object 
 2   dep_date                16292 non-null  object 
 3   dep_time_slot           16292 non-null  float64
 4   arr_city                16292 non-null  object 
 5   arr_time_slot           16292 non-null  float64
 6   train_duration_in_min   16292 non-null  float64
 7   train_nb_changes        16292 non-null  float64
 8   train_name_1            16292 non-null  object 
 9   train_name_2            11537 non-null  object 
 10  train_price             16292 non-null  float64
 11  BlablacarBus_available  16292 non-null  object 
 12  train_distance_in_km    16292 non-null  float64
 13  train_price_per_km      16292 non-null  float64
 14  route                   16292 non-null

In [196]:
# get the lowest price train per day and time slot between 2 cities
lowest_trains=pd.DataFrame()
cities=['Paris', 'Nantes', 'Lyon', 'Lille', 'Marseille', 'Reims',
       'Bordeaux', 'Toulouse', 'Montpellier', 'Nice', 'Brest',
       'Rennes', 'Poitiers', 'Nancy', 'Strasbourg', 'Metz']
main_cities=['Paris', 'Nantes', 'Lyon', 'Lille', 'Marseille', 'Reims',
       'Bordeaux', 'Toulouse', 'Montpellier', 'Nice', 'Brest',
       'Rennes', 'Poitiers', 'Nancy', 'Strasbourg', 'Metz']
for i in [1,2,3,4]:
    for j in main_cities:
        dt=trains_data
        dt1=dt[dt["dep_city"]==j]
        dt2=dt1[dt1["dep_time_slot"]==i]
        for k in cities:
            if(k!=j):
                dt3=dt2[dt2["arr_city"]==k]
                for l in list(range(9)):
                    date="2"+str(l)+"-01-23"
                    dt4=dt3[dt3["dep_date"]==date]
                    dt5=dt4[dt4["train_price"]==dt4["train_price"].min()]
                    lowest_trains=lowest_trains.append(dt5)
lowest_trains=lowest_trains.sort_values(["dep_date","dep_time_slot"])
lowest_trains=lowest_trains.drop_duplicates(subset=["dep_city","arr_city","dep_time_slot","dep_date"])                    
lowest_trains=lowest_trains.reset_index()                


  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.append(dt5)
  lowest_trains=lowest_trains.appe

In [177]:
lowest_trains


Unnamed: 0,index,region,dep_city,dep_date,dep_time_slot,arr_city,arr_time_slot,train_duration_in_min,train_nb_changes,train_name_1,train_name_2,train_price,BlablacarBus_available,train_distance_in_km,train_price_per_km,route
0,4,0.0,Paris,20-01-23,1.0,Nantes,1.0,224.0,0.0,Ouigo Classique,,19.00,No,342.702025,0.055442,"Paris,Nantes"
1,299,0.0,Paris,20-01-23,1.0,Lyon,2.0,308.0,0.0,SNCF,,65.60,No,392.047741,0.167327,"Paris,Lyon"
2,594,0.0,Paris,20-01-23,1.0,Lille,2.0,273.0,3.0,SNCF,SNCF,37.80,No,203.376494,0.185862,"Paris,Lille"
3,826,0.0,Paris,20-01-23,1.0,Marseille,2.0,576.0,1.0,BlablacarBus,TGV Inoui,88.99,Yes,660.494275,0.134732,"Paris,Marseille"
4,2022,0.0,Nantes,20-01-23,1.0,Paris,1.0,250.0,0.0,Ouigo Classique,,22.00,No,342.702025,0.064196,"Nantes,Paris"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2336,7253,4.0,Nancy,28-01-23,4.0,Strasbourg,4.0,83.0,0.0,SNCF,,10.00,No,115.366401,0.086680,"Nancy,Strasbourg"
2337,7569,4.0,Nancy,28-01-23,4.0,Metz,4.0,41.0,0.0,SNCF,,12.60,No,47.449478,0.265546,"Nancy,Metz"
2338,8024,4.0,Strasbourg,28-01-23,4.0,Nancy,4.0,191.0,1.0,Ouigo,SNCF,15.00,No,115.366401,0.130021,"Strasbourg,Nancy"
2339,8282,4.0,Strasbourg,28-01-23,4.0,Metz,4.0,50.0,0.0,Ouigo,,10.00,No,129.331581,0.077321,"Strasbourg,Metz"


In [197]:
sum(lowest_trains['dep_date'].value_counts())

2341

In [198]:
lowest_trains = lowest_trains.drop("index", axis=1)

In [208]:
lowest_trains.to_csv('lowest_trains.csv')

In [199]:
lowest_trains.head()

Unnamed: 0,region,dep_city,dep_date,dep_time_slot,arr_city,arr_time_slot,train_duration_in_min,train_nb_changes,train_name_1,train_name_2,train_price,BlablacarBus_available,train_distance_in_km,train_price_per_km,route
0,0.0,Paris,20-01-23,1.0,Nantes,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"
1,0.0,Paris,20-01-23,1.0,Lyon,2.0,308.0,0.0,SNCF,,65.6,No,392.047741,0.167327,"Paris,Lyon"
2,0.0,Paris,20-01-23,1.0,Lille,2.0,273.0,3.0,SNCF,SNCF,37.8,No,203.376494,0.185862,"Paris,Lille"
3,0.0,Paris,20-01-23,1.0,Marseille,2.0,576.0,1.0,BlablacarBus,TGV Inoui,88.99,Yes,660.494275,0.134732,"Paris,Marseille"
4,0.0,Nantes,20-01-23,1.0,Paris,1.0,250.0,0.0,Ouigo Classique,,22.0,No,342.702025,0.064196,"Nantes,Paris"


In [200]:
blablacar_data.head()

Unnamed: 0,region,dep_city,dep_date,dep_time_slot,arr_city,arr_time_slot,blablacar_price,blablacar_distance_in_km,blablacar_duration_in_min,blablacar_price_per_km,route
0,0.0,Paris,20-01-23,4.0,Nantes,4.0,32.0,370.348,230.0,0.086405,"Paris,Nantes"
1,0.0,Paris,20-01-23,1.0,Nantes,1.0,25.0,380.185,240.0,0.065757,"Paris,Nantes"
2,0.0,Paris,20-01-23,1.0,Nantes,2.0,29.0,396.878,290.0,0.07307,"Paris,Nantes"
3,0.0,Paris,20-01-23,1.0,Nantes,2.0,27.0,360.571,220.0,0.074881,"Paris,Nantes"
4,0.0,Paris,20-01-23,1.0,Nantes,2.0,31.0,382.781,240.0,0.080986,"Paris,Nantes"


In [203]:
blablacar_trips_matched_with_cheapest_replacing_train.head()

Unnamed: 0,region_x,dep_city,dep_date,dep_time_slot,arr_city,arr_time_slot_x,blablacar_price,blablacar_distance_in_km,blablacar_duration_in_min,blablacar_price_per_km,...,arr_time_slot_y,train_duration_in_min,train_nb_changes,train_name_1,train_name_2,train_price,BlablacarBus_available,train_distance_in_km,train_price_per_km,route_y
0,0.0,Paris,20-01-23,1.0,Nantes,1.0,25.0,380.185,240.0,0.065757,...,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"
1,0.0,Paris,20-01-23,1.0,Nantes,2.0,29.0,396.878,290.0,0.07307,...,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"
2,0.0,Paris,20-01-23,1.0,Nantes,2.0,27.0,360.571,220.0,0.074881,...,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"
3,0.0,Paris,20-01-23,1.0,Nantes,2.0,31.0,382.781,240.0,0.080986,...,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"
4,0.0,Paris,20-01-23,1.0,Nantes,2.0,32.0,370.929,230.0,0.08627,...,1.0,224.0,0.0,Ouigo Classique,,19.0,No,342.702025,0.055442,"Paris,Nantes"


In [206]:
blablacar_trips_matched_with_cheapest_replacing_train = pd.merge(blablacar_data, lowest_trains, on=['dep_city', 'arr_city','dep_date','dep_time_slot'],how='left')


In [218]:
blablacar_trips_matched_with_cheapest_replacing_train.to_csv('blablacar_trips_matched_with_cheapest_replacing_train.csv')

In [None]:
trains_data = trains_data.assign(route=(trains_data['dep_city']+','+trains_data['arr_city'] ))

In [216]:
blablacar_trips_matched_with_cheapest_replacing_train['train_available'] = np.where(blablacar_trips_matched_with_cheapest_replacing_train["train_name_1"].isnull(), 'No', 'Yes')


In [217]:
blablacar_trips_matched_with_cheapest_replacing_train['Paris_is_dep_city'] = np.where(blablacar_trips_matched_with_cheapest_replacing_train["dep_city"]=='Paris', 'Yes', 'No')