In [1]:
!pip install requests pandas python-dotenv


Collecting python-dotenv
  Downloading python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)
Downloading python_dotenv-1.0.1-py3-none-any.whl (19 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.0.1


In [4]:
import nest_asyncio
import os
import requests
import pandas as pd
import asyncio
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, dayofweek, hour, to_timestamp
from google.colab import drive
from dotenv import load_dotenv

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [5]:
load_dotenv('//content/drive/MyDrive/Big Data Engineering/2024/Project 2/Final/secrets.env')

True

In [10]:
nest_asyncio.apply()

# Initialize Spark session
spark = SparkSession.builder.appName("WeatherDataProcessing").getOrCreate()

OPENWEATHER_API_KEY = os.getenv('OPENWEATHER_API_KEY')


if not OPENWEATHER_API_KEY:
    print("Error: API key not found in environment variables.")
else:
    print("API key loaded successfully.")


def get_weather(city):
    url = f"http://api.openweathermap.org/data/2.5/weather?q={city}&appid={OPENWEATHER_API_KEY}&units=metric"
    try:
        response = requests.get(url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"Error fetching weather data for {city}: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error during the request: {e}")
        return None

def preprocess_weather_data(weather_data):
    data = [{
        'city': weather_data['name'],
        'description': weather_data['weather'][0]['description'],
        'temperature': weather_data['main']['temp'],
        'humidity': weather_data['main']['humidity'],
        'wind_speed': weather_data['wind']['speed'],
        'pressure': weather_data['main']['pressure'],
        'cloudiness': weather_data['clouds']['all'],
        'timestamp': pd.to_datetime('now').isoformat()
    }]
    return data


def to_spark_dataframe(data):
    df = spark.createDataFrame(data)
    df = df.withColumn('timestamp', to_timestamp('timestamp'))
    return df

def add_time_features(df):
    df = df.withColumn('hour_of_day', hour(col('timestamp')))
    df = df.withColumn('day_of_week', dayofweek(col('timestamp')))
    return df

# Asynchronous function to fetch weather data periodically
async def fetch_weather_periodically(city, interval=60):
    while True:
        weather_data = get_weather(city)
        if weather_data:

            preprocessed_data = preprocess_weather_data(weather_data)

            df = to_spark_dataframe(preprocessed_data)

            df_with_time_features = add_time_features(df)
            # Show the DataFrame with added features
            df_with_time_features.show()
        await asyncio.sleep(interval)

# Function to start fetching data for multiple cities concurrently
def start_real_time_weather_fetch(cities, interval=60):
    loop = asyncio.get_event_loop()
    tasks = []
    for city in cities:
        tasks.append(loop.create_task(fetch_weather_periodically(city, interval)))
    loop.run_until_complete(asyncio.gather(*tasks))

#  Fetch weather data for multiple cities
cities = ["California", "Cape Town"]
start_real_time_weather_fetch(cities, interval=30)

API key loaded successfully.
+----------+----------+-----------+--------+--------+-----------+--------------------+----------+-----------+-----------+
|      city|cloudiness|description|humidity|pressure|temperature|           timestamp|wind_speed|hour_of_day|day_of_week|
+----------+----------+-----------+--------+--------+-----------+--------------------+----------+-----------+-----------+
|California|         0|  clear sky|      82|    1027|       6.26|2024-11-10 12:11:...|      1.54|         12|          1|
+----------+----------+-----------+--------+--------+-----------+--------------------+----------+-----------+-----------+

+---------+----------+-------------+--------+--------+-----------+--------------------+----------+-----------+-----------+
|     city|cloudiness|  description|humidity|pressure|temperature|           timestamp|wind_speed|hour_of_day|day_of_week|
+---------+----------+-------------+--------+--------+-----------+--------------------+----------+-----------+----

KeyboardInterrupt: 