### Function to fetch weather data from Openweather API and load it into the bronze table

In [None]:
@logger
def extract_weather(cities_df = None):
    
    # importing required libraries
    from pyspark.sql.types import StructType, StructField, IntegerType, FloatType, StringType, ArrayType
    from pyspark.sql.functions import col, udf
    import requests
    from datetime import datetime
    from dotenv import load_dotenv
    import os
    
    
    # creating the schema for the raw table
    weather_data_schema = StructType([
        StructField('visibility', IntegerType(), True),
        StructField('timezone', IntegerType(), True),
        StructField('main', StructType([
            StructField('temp', FloatType(), True),
            StructField('feels_like', FloatType(), True),
            StructField('temp_min', FloatType(), True),
            StructField('temp_max', FloatType(), True),
            StructField('pressure', IntegerType(), True),
            StructField('humidity', IntegerType(), True),
            StructField('sea_level', IntegerType(), True),
            StructField('grnd_level', IntegerType(), True)
        ])),
        StructField('clouds', StructType([
            StructField('all', FloatType(), True)
        ])),
        StructField('sys', StructType([
            StructField('country', StringType(), True),
            StructField('sunrise', IntegerType(), True),
            StructField('sunset', IntegerType(), True)
        ])),
        StructField('dt', IntegerType(), True),
        StructField('coord', StructType([
            StructField('lon', FloatType(), True),
            StructField('lat', FloatType(), True)
        ])),
        StructField('name', StringType(), True),
        StructField('weather', ArrayType(StructType([
                    StructField('id', IntegerType(), True),
                    StructField('main', StringType(), True),
                    StructField('description', StringType(), True),
                    StructField('icon', StringType(), True)
        ]), True)),
        StructField('cod', IntegerType(), True),
        StructField('id', IntegerType(), True),
        StructField('wind', StructType([
            StructField('speed', IntegerType(), True),
            StructField('deg', IntegerType(), True),
            StructField('gust', FloatType(), True)
        ])),
        StructField('base', StringType(), True)
    ]) 
    
#     dbutils.fs.cp('dbfs:/FileStore/.env', "file:/tmp/.env")
#     load_dotenv("file:/tmp/.env")
#     APIKEY = os.getenv("API")
    api_endpoint = "https://api.openweathermap.org/data/2.5/weather?"
#     print(APIKEY)
    
    def fetch_weather_data(cityID : str) -> dict:
        request_url = f"{api_endpoint}id={cityID}&appid=<API_KEY>&units=metric"
        weather_data = requests.get(request_url)  
        if weather_data.status_code == 200:
            return weather_data.json()
        else:
            return None
        
    fetch_weather_udf = udf(lambda id : fetch_weather_data(id), weather_data_schema)
    
    if cities_df == None:
        cities_df = spark.sql("SELECT * FROM dim_city_table LIMIT 5;")
    weather_df = cities_df.withColumn('result', fetch_weather_udf(col('id'))).select('result')
    start = datetime.fromtimestamp(weather_df.selectExpr("min(result.dt)").first()[0])
    end = datetime.fromtimestamp(weather_df.selectExpr("max(result.dt)").first()[0])
    
    return weather_df, start, end

[0;36m  File [0;32m<command-288248581474616>:64[0;36m[0m
[0;31m    request_url = f"{api_endpoint}id={cityID}&appid="{API_KEY}"&units=metric"[0m
[0m                                                     ^[0m
[0;31mSyntaxError[0m[0;31m:[0m invalid syntax
