# Blablattractivity - Local and tests -

## Data collection

We start with basic imports to use blablacar web api.

In [1]:
#%pip install pandas

In [2]:
#%pip install fsspec

In [3]:
#%pip install google-cloud-storage

In [4]:
#%pip install matplotlib

In [5]:
#%pip install seaborn

In [1]:
import json
import requests
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import AutoMinorLocator
import matplotlib.dates as mdates
import seaborn as sns
from google.cloud import storage
import datetime 
from math import ceil, floor

In [2]:
origin_destination_df = pd.read_csv("./origin_destination.csv", sep=";")
# Our testing sample
origin_destination_df = origin_destination_df.head(23).copy(deep=True)
origin_destination_df = origin_destination_df.tail(1).copy(deep=True).reset_index(drop=True)
origin_destination_df

Unnamed: 0,Origin,Destination,From_coordinate,To_coordinate
0,Paris,Marseille,48.856614%7C2.352221,43.296482%2C5.36978


Convert the dataframe to a dictionnary :

In [3]:
route_dict = origin_destination_df.to_dict('index') 
route_dict

{0: {'Origin': 'Paris',
  'Destination': 'Marseille',
  'From_coordinate': '48.856614%7C2.352221',
  'To_coordinate': '43.296482%2C5.36978'}}

Dès qu'on reçoit l'API on part là-dessus : https://dev.blablacar.com/hc/en-us/articles/360012064820--Search-V3-API-Documentation

In [4]:
api_token = open("./api.txt","r").read()
api_url = "https://public-api.blablacar.com"

We have the basic parameters of our API call :

In [5]:
from_country = "FR" # string : ISO_3166_1
to_country = "FR" # string : ISO_3166_1
locale = "fr-FR" # string : locale in which the result will be displayed https://dev.blablacar.com/hc/en-us/articles/360008930020
currency = "EUR" # string : ISO_4217 currency
number_of_trips_we_want_returned = "100" # int : from 1 to 100, default is 10 
requested_seats = "1" # int : minimum number of seats requested, default value is 1
radius_in_meters = "6000" # int : radius of the circle around the coordinates in which trips will be searched from (in meters) - the default is 5000, so I chose 6000 because I noticed that 5000 is okay for cities but I want a margin of error for people meeting a bit away from cities

Then the date of the trip will be different everytime we run the code (it has to be next friday everytime), so we compute it here :

In [6]:
today = datetime.date.today()
friday = today + datetime.timedelta( (4-today.weekday()) % 7 )
next_friday_datetime = str(friday)+"T00:00:00"
start_date_local = next_friday_datetime

Create the dictionnary that is going to hold our information :

In [17]:
new_dict = {'trip_id':[],'price_of_the_trip':[],'date':[],'origin':[],'destination':[],'count_trips_on_this_route':[]}

Collect data : 

In [18]:
for index in route_dict:
    current_origin = route_dict[index]['Origin']
    current_destination = route_dict[index]['Destination']
    from_coordinate = route_dict[index]['From_coordinate'] # string : latitude, longitude - current_origin
    to_coordinate = route_dict[index]['To_coordinate'] # string : latitude, longitude - current_destination
    
    blabla_query = api_url+"/api/v3/trips?key="+api_token+"&from_coordinate="+from_coordinate+"&from_country="+from_country+"&to_coordinate="+to_coordinate+"&to_country="+to_country+"&locale="+locale+"&currency="+currency+"&count="+number_of_trips_we_want_returned+"&start_date_local="+start_date_local+"&radius_in_meters="+radius_in_meters+"&sort=price:asc"
    response = requests.get(blabla_query)
    
    # print(response.status_code) # SHOULD BE 200
    
    json_trips = response.json() # it's a dictionnary
    number_of_trips = json_trips["search_info"]["count"]
    for dict_trip in json_trips["trips"]:
        trip_id =  dict_trip["link"].split("id=")[1].split("-")[0]
        price_of_the_trip = dict_trip["price"]["amount"]
        new_dict['trip_id'].append(trip_id)
        new_dict['price_of_the_trip'].append(price_of_the_trip)
        new_dict['date'].append(start_date_local[:10])
        new_dict['origin'].append(current_origin)
        new_dict['destination'].append(current_destination)
        new_dict['count_trips_on_this_route'].append(number_of_trips)

In [19]:
new_dict

{'trip_id': ['2033843926', '2029583051'],
 'price_of_the_trip': ['52.50', '57.50'],
 'date': ['2020-08-28', '2020-08-28'],
 'origin': ['Paris', 'Paris'],
 'destination': ['Marseille', 'Marseille'],
 'count_trips_on_this_route': [2, 2]}

Feed it to the dataframe :

In [14]:
new_df = pd.DataFrame.from_dict(new_dict)
new_df.price_of_the_trip = new_df.price_of_the_trip.astype('float')    

Save the df in folder here :

In [15]:
old_df = pd.read_csv("./blablacar_rennes_brest.csv",index_col = "Unnamed: 0") 
old_df.head()
df_trips = old_df.append(new_df, ignore_index=True, sort=False)
df_trips.head()
df_trips.to_csv("./blablacar_rennes_brest.csv")