### Parameters in Notebooks

In [0]:
dbutils.widgets.help()

In [0]:
# You can see now parameters up there.
dbutils.widgets.text("p_data_source","")
v_data_source = dbutils.widgets.get("p_data_source")

# whatever the value you will enter in above test widget. you will get here
v_data_source

Out[3]: 'test_value'

#### Run the confuguration notebook

In [0]:
%run "../includes/configuration"

In [0]:
! ls /Workspace/Formula1/includes/

common_functions  configuration


Out[4]: [FileInfo(path='dbfs:/mnt/formularacedata/presentation/', name='presentation/', size=0, modificationTime=0),
 FileInfo(path='dbfs:/mnt/formularacedata/processed/', name='processed/', size=0, modificationTime=0),
 FileInfo(path='dbfs:/mnt/formularacedata/raw/', name='raw/', size=0, modificationTime=0)]

In [0]:
%run "/Formula1/includes/common_functions"

In [0]:
display(dbutils.fs.mounts())

mountPoint,source,encryptionType
/databricks-datasets,databricks-datasets,
/mnt/formularacedata/raw,abfss://raw@formularacedata.dfs.core.windows.net,
/databricks/mlflow-tracking,databricks/mlflow-tracking,
/mnt/formularacedata/presentation,abfss://presentation@formularacedata.dfs.core.windows.net,
/databricks-results,databricks-results,
/mnt/formularacedata/processed,abfss://processed@formularacedata.dfs.core.windows.net,
/databricks/mlflow-registry,databricks/mlflow-registry,
/,DatabricksRoot,


In [0]:
schema = "circuit_id bigInt, circuit_ref String, name String, location String, country String, lat float, lng float, alt int, url String"
file_path = f'{raw_folder_path}/circuits.csv'
df = spark.read.format('csv').schema(schema).option('header',True).load(file_path)
df.show(3)


+----------+-----------+--------------------+------------+---------+--------+-------+---+--------------------+
|circuit_id|circuit_ref|                name|    location|  country|     lat|    lng|alt|                 url|
+----------+-----------+--------------------+------------+---------+--------+-------+---+--------------------+
|         1|albert_park|Albert Park Grand...|   Melbourne|Australia|-37.8497|144.968| 10|http://en.wikiped...|
|         2|     sepang|Sepang Internatio...|Kuala Lumpur| Malaysia| 2.76083|101.738| 18|http://en.wikiped...|
|         3|    bahrain|Bahrain Internati...|      Sakhir|  Bahrain| 26.0325|50.5106|  7|http://en.wikiped...|
+----------+-----------+--------------------+------------+---------+--------+-------+---+--------------------+
only showing top 3 rows



In [0]:
df.printSchema()

root
 |-- circuit_id: long (nullable = true)
 |-- circuit_ref: string (nullable = true)
 |-- name: string (nullable = true)
 |-- location: string (nullable = true)
 |-- country: string (nullable = true)
 |-- lat: float (nullable = true)
 |-- lng: float (nullable = true)
 |-- alt: integer (nullable = true)
 |-- url: string (nullable = true)



In [0]:
from pyspark.sql.functions import lit

df_new = df.withColumn("test",lit(dbutils.widgets.get("p_data_source")))

In [0]:
df_with_time =  add_ingestion_date(df_new)

df_with_time.show(5,truncate=False)

+----------+-----------+------------------------------+------------+---------+--------+-------+---+-----------------------------------------------------------+----------+-----------------------+
|circuit_id|circuit_ref|name                          |location    |country  |lat     |lng    |alt|url                                                        |test      |ingestion_date         |
+----------+-----------+------------------------------+------------+---------+--------+-------+---+-----------------------------------------------------------+----------+-----------------------+
|1         |albert_park|Albert Park Grand Prix Circuit|Melbourne   |Australia|-37.8497|144.968|10 |http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit  |test_value|2023-06-29 18:55:35.989|
|2         |sepang     |Sepang International Circuit  |Kuala Lumpur|Malaysia |2.76083 |101.738|18 |http://en.wikipedia.org/wiki/Sepang_International_Circuit  |test_value|2023-06-29 18:55:35.989|
|3         |bahrain    |B

In [0]:
df_with_time.write.mode('overwrite').format('delta').saveAsTable("f1_processed.circuits")

In [0]:
%sql
SELECT * FROM f1_processed.circuits LIMIT 5

circuit_id,circuit_ref,name,location,country,lat,lng,alt,url,test,ingestion_date
1,albert_park,Albert Park Grand Prix Circuit,Melbourne,Australia,-37.8497,144.968,10,http://en.wikipedia.org/wiki/Melbourne_Grand_Prix_Circuit,test_value,2023-06-29T18:55:38.334+0000
2,sepang,Sepang International Circuit,Kuala Lumpur,Malaysia,2.76083,101.738,18,http://en.wikipedia.org/wiki/Sepang_International_Circuit,test_value,2023-06-29T18:55:38.334+0000
3,bahrain,Bahrain International Circuit,Sakhir,Bahrain,26.0325,50.5106,7,http://en.wikipedia.org/wiki/Bahrain_International_Circuit,test_value,2023-06-29T18:55:38.334+0000
4,catalunya,Circuit de Barcelona-Catalunya,Montmeló,Spain,41.57,2.26111,109,http://en.wikipedia.org/wiki/Circuit_de_Barcelona-Catalunya,test_value,2023-06-29T18:55:38.334+0000
5,istanbul,Istanbul Park,Istanbul,Turkey,40.9517,29.405,130,http://en.wikipedia.org/wiki/Istanbul_Park,test_value,2023-06-29T18:55:38.334+0000


In [0]:
dbutils.notebook.exit("sucess")

sucess