## Ingest constructors.json file


In [0]:
%run ../includes/configuration

In [0]:
%run ../includes/common_functions

In [0]:
dbutils.widgets.text('p_data_source', '')
v_data_source = dbutils.widgets.get('p_data_source')

In [0]:
dbutils.widgets.text('p_file_date', '2021-03-21')
v_file_date = dbutils.widgets.get('p_file_date')

# Step 1.- Read the JSON file using the spark dataframe reader 

In [0]:
## Usaremos el método DDL para definir el schema de los datos
constructor_schema = 'constructorId INT, constructorRef STRING, name STRING, nationality STRING, url STRING'

In [0]:
constructor_df = spark.read \
    .schema(constructor_schema) \
    .json(f'{raw_folder_path}/{v_file_date}/constructors.json')

## Step 2.- Drop unwanted columns from the dataframe

In [0]:
#Esta es una manera de hacerlo.
constructor_dropped_df = constructor_df.drop('url')

#Otra manera de hacerlo es usando el nombre del dataframe
#constructor_droppped_df = constructor_df.drop(constructor_df.url)

#o la tercera con la funcion col 
#from pyspark.sql.functions import col 
#constructor_dropped_df = constructor_df.drop(col('url'))

## Step 3.- Rename columns and add ingestion date

In [0]:
from pyspark.sql.functions import lit

In [0]:
constructor_final_df = add_ingestion_date(constructor_dropped_df) \
                                              .withColumnRenamed('constructorId', 'constructor_id') \
                                              .withColumnRenamed('constructorRef', 'constructor_ref') \
                                              .withColumn('data_source', lit(v_data_source)) \
                                              .withColumn('file_date', lit(v_file_date))

## Step 4.- Write output to parquet files 

In [0]:

constructor_final_df.write.mode('overwrite').format('delta').saveAsTable('f1_processed.constructors')

In [0]:
%sql
select * from f1_processed.constructors

In [0]:
dbutils.notebook.exit('Success')