### OCI Data Science - pull data
This notebook provides toos and techniques to pull required data for the project

##### Use fastF1 API 
* Pull Lap, weather, car, results, and position data 




#### This notebook uses formula1conda which is a custom conda
fastf1 package requires python>=3.8 and OCI prebuild packages at this time mostly come with python==3.7.

#### Steps to build and publish a custom conda:
1. create a yaml file and list your packages 
2. in the terminal execute: `odsc conda create -f environment.yaml -n my-conda-env`

In [1]:
path = '/home/datascience/WorkSpace/RedBull-Racining-TimeToPit/notebooks'

In [2]:
import os
os.chdir(path)
import pandas as pd
import logging
import json
import pickle
import requests
import numpy as np
import fastf1
import matplotlib.pyplot as plt
import pickle
fastf1.Cache.enable_cache('../../RedBull/data')

In [3]:
# schedule = fastf1.get_event_schedule(2022).to_dict()
# schedule = pd.DataFrame.from_dict(schedule)
# schedule.head(3)

In [4]:
# EventName = list(schedule['EventName'][(schedule.index<13) & (schedule.index>1)])

In [5]:
def get_lap_data(session, schedule, evnt, ses):
    '''get lap data from a session and
       retun as a dataframe
    '''
    lap = session.laps.to_dict()
    lap = pd.DataFrame.from_dict(lap)
    lap['EventName'] =evnt
    lap['country'] = schedule['Country'][schedule['EventName']==evnt].values[0]
    lap['session'] = ses
    lap['EventDate'] = schedule['EventDate'][schedule['EventName']==evnt].values[0]
    return lap

In [6]:
def get_weather_data(session, schedule, evnt, ses): 
    '''get weather data from a session and
       retun as a dataframe
    '''
    weather = session.weather_data.to_dict()
    weather = pd.DataFrame.from_dict(weather)
    weather['EventName'] =evnt
    weather['country'] = schedule['Country'][schedule['EventName']==evnt].values[0]
    weather['session'] = ses
    weather['EventDate'] = schedule['EventDate'][schedule['EventName']==evnt].values[0]
    return weather

In [7]:
def get_car_data(session, schedule, evnt, ses):
    '''get car_data from a session and
       retun as a dataframe
    '''
    for ii in session.car_data:
        car_data = session.car_data[ii].to_dict()
        car_data = pd.DataFrame.from_dict(car_data)
        car_data['driver'] = ii
    car_data['EventName'] =evnt
    car_data['country'] = schedule['Country'][schedule['EventName']==evnt].values[0]
    car_data['session'] = ses
    car_data['EventDate'] = schedule['EventDate'][schedule['EventName']==evnt].values[0]
    return car_data

In [8]:
def get_position_data(session, schedule, evnt, ses):
    '''get position_data from a session and
       retun as a dataframe
    '''    
    for ii in session.pos_data:
        position = session.pos_data[ii].to_dict()
        position = pd.DataFrame.from_dict(position)
        position['driver'] = ii
    position['EventName'] =evnt
    position['country'] = schedule['Country'][schedule['EventName']==evnt].values[0]
    position['session'] = ses
    position['EventDate'] = schedule['EventDate'][schedule['EventName']==evnt].values[0]
    return position

In [9]:
def get_results(session, schedule, evnt, ses):
    '''get results data from a session and
       retun as a dataframe
    '''
    result = session.results.to_dict()
    result = pd.DataFrame.from_dict(result).reset_index()
    result['EventName'] =evnt
    result['country'] = schedule['Country'][schedule['EventName']==evnt].values[0]
    result['session'] = ses
    result['EventDate'] = schedule['EventDate'][schedule['EventName']==evnt].values[0]
    return result

In [None]:
%%time
for year in [2021,2020, 2019]:
    
    laps =[]
    weathers =[]
    results = []
    car_data_ses =[]
    all_positions =[]
    
    sch = fastf1.get_event_schedule(year).to_dict()
    sch = pd.DataFrame.from_dict(sch)

#     schedule.drop(schedule[schedule['EventName'] == 'Pre-Season Test'].index, inplace = True)

    EventName = [s for s in sch['EventName'] if "Grand" in s]
    
    for event in EventName:
        for session_type in ['Qualifying','Race']:
            session = fastf1.get_session(year, event, session_type)
            session.load()
            
            ## get lap data for a session
            laps.append(get_lap_data(session, sch, event, session_type))
            
            ## get weather data for a session
            weathers.append(get_weather_data(session, sch, event, session_type))

            ## get car_data for a session
            car_data_ses.append(get_car_data(session, sch, event, session_type))
            
            ## get positions for a session
            all_positions.append(get_position_data(session, sch, event, session_type))
            
            ## get results for a session
            results.append(get_results(session, sch, event, session_type))

    ## save all extracted session data in a year
    file = open('../../RedBull/data/laps_'+str(year)+'.pkl', 'wb')
    pickle.dump(laps,file)
    file = open('../../RedBull/data/weathers_'+str(year)+'.pkl', 'wb')
    pickle.dump(weathers,file)
    file = open('../../RedBull/data/results_'+str(year)+'.pkl', 'wb')
    pickle.dump(results,file)
    file = open('../../RedBull/data/car_data_ses_'+str(year)+'.pkl', 'wb')
    pickle.dump(car_data_ses,file)
    file = open('../../RedBull/data/all_positions_'+str(year)+'.pkl', 'wb')
    pickle.dump(all_positions,file)

core           INFO 	Loading data for Australian Grand Prix - Qualifying [v2.2.8]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for timing_data
api            INFO 	Using cached data for timing_app_data
core           INFO 	Processing timing data...
api            INFO 	Using cached data for session_status_data
api            INFO 	Using cached data for track_status_data
api            INFO 	Using cached data for car_data
api            INFO 	Using cached data for position_data
api            INFO 	Using cached data for weather_data
api            INFO 	Using cached data for race_control_messages
core           INFO 	Finished loading data for 20 drivers: ['44', '77', '5', '33', '16', '8', '20', '4', '7', '11', '27', '3', '23', '99', '26', '18', '10', '55', '63', '88']
core           INFO 	Loading data for Australian Grand Prix - Race [v2.2.8]
api            INFO 	Using cached data for driver_info
api            INFO 	Using cached data for