In [None]:
# 1. Data Acquisition:
# Use Python's asyncio library to make concurrent API calls to fetch data for multiple cities.
# Justification: Asyncio is lightweight, efficient, and suitable for IO-bound tasks like API calls.
import aiohttp
import asyncio

async def fetch_weather_data(city):
    # API call to fetch data
    async with aiohttp.ClientSession() as session:
        async with session.get(f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid=YOUR_API_KEY") as response:
            return await response.json()

async def main():
    cities = ["Istanbul", "London", "Saint Petersburg", ...]
    tasks = [fetch_weather_data(city) for city in cities]
    responses = await asyncio.gather(*tasks)
    # Process and store responses

asyncio.run(main())


In [None]:
# 2. Database Setup:
# Use PostgreSQL as the RDBMS.
# Create a table weather_data with columns: id, city, country, timestamp, temperature, condition.
CREATE TABLE weather_data (
    id SERIAL PRIMARY KEY,
    city VARCHAR(50),
    country VARCHAR(50),
    timestamp TIMESTAMP,
    temperature FLOAT,
    condition VARCHAR(50)
);


In [None]:
# 3. Data Insertion:
# Insert the fetched data into the weather_data table.
# Assuming conn is a psycopg2 connection object
with conn.cursor() as cursor:
    insert_query = """INSERT INTO weather_data (city, country, timestamp, temperature, condition) VALUES (%s, %s, %s, %s, %s)"""
    cursor.executemany(insert_query, data_to_insert)
conn.commit()


In [None]:
# 4. Data Analysis:
# Create SQL views or queries for the required analysis.
# 5. Scheduling:
# Set up cron jobs to run the data fetching and analysis scripts periodically.

# Fetch data every hour
# 0 * * * * /path/to/python /path/to/data_fetch_script.py

# # Run analysis every day at midnight
# 0 0 * * * /path/to/python /path/to/analysis_script.py


In [None]:
6. Scalability:
The database schema and Python scripts can easily accommodate additional cities by adding more rows and adjusting the list of cities in the script.
7. Validation and Improvements:
The results from the database can be compared with simulator predictions to validate and make improvements.
Possible improvements include optimizing queries, adding indexes to the database, and fine-tuning the concurrency model.


In [None]:
Model Definition
Define a Pydantic model that represents the structure of the weather data. The model includes fields like city, country, timestamp, temperature, and condition with the appropriate data types and constraints.

from pydantic import BaseModel, Field
from datetime import datetime

class WeatherData(BaseModel):
    city: str = Field(..., max_length=50)
    country: str = Field(..., max_length=50)
    timestamp: datetime
    temperature: float
    condition: str = Field(..., max_length=50)


In [None]:
Validation
Before inserting the data into the database, create an instance of the WeatherData model using the fetched data. If the data is invalid, Pydantic will raise a ValidationError.

import aiohttp
import asyncio
from pydantic import ValidationError

async def fetch_weather_data(city):
    async with aiohttp.ClientSession() as session:
        async with session.get(f"https://api.openweathermap.org/data/2.5/weather?q={city}&appid=YOUR_API_KEY") as response:
            data = await response.json()

            # Extract relevant data
            country = data['sys']['country']
            timestamp = datetime.fromtimestamp(data['dt'])
            temperature = data['main']['temp']
            condition = data['weather'][0]['description']

            # Validate data using Pydantic
            try:
                weather_data = WeatherData(
                    city=city,
                    country=country,
                    timestamp=timestamp,
                    temperature=temperature,
                    condition=condition
                )
                return weather_data
            except ValidationError as e:
                print(f"Validation error for {city}: {e}")
                return None

async def main():
    cities = ["Istanbul", "London", "Saint Petersburg", ...]
    tasks = [fetch_weather_data(city) for city in cities]
    responses = await asyncio.gather(*tasks)
    # Process and store valid responses

asyncio.run(main())


In [None]:
Benefits of Validation
Data Integrity: By using Pydantic for validation, you ensure that only valid data gets inserted into the database.
Error Handling: Pydantic will raise errors for any discrepancies in data type or constraints, making it easier to identify issues early on.
Ease of Maintenance: Defining a Pydantic model provides a clear schema for the expected data, making the code easier to understand and maintain.


In [None]:
# add additional analytical information:
# Today
SELECT country, city,
       MAX(temperature) AS max_temp_today,
       MIN(temperature) AS min_temp_today,
       STDDEV(temperature) AS stddev_temp_today
FROM weather_data
WHERE DATE(timestamp) = CURRENT_DATE
GROUP BY country, city;


In [None]:
# Yerterday
SELECT country, city,
       MAX(temperature) AS max_temp_yesterday,
       MIN(temperature) AS min_temp_yesterday,
       STDDEV(temperature) AS stddev_temp_yesterday
FROM weather_data
WHERE DATE(timestamp) = CURRENT_DATE - INTERVAL '1 day'
GROUP BY country, city;


In [None]:
# current week
SELECT country, city,
       MAX(temperature) AS max_temp_week,
       MIN(temperature) AS min_temp_week,
       STDDEV(temperature) AS stddev_temp_week
FROM weather_data
WHERE EXTRACT(week FROM timestamp) = EXTRACT(week FROM CURRENT_DATE)
GROUP BY country, city;


In [None]:
# Last Seven Days:
SELECT country, city,
       MAX(temperature) AS max_temp_7days,
       MIN(temperature) AS min_temp_7days,
       STDDEV(temperature) AS stddev_temp_7days
FROM weather_data
WHERE timestamp >= NOW() - INTERVAL '7 days'
GROUP BY country, city;



In [None]:
# 2. Cities with Highest or Lowest Temperature:
WITH hourly_temps AS (
  SELECT city, country, temperature,
         ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('hour', timestamp) ORDER BY temperature DESC) as rn_max,
         ROW_NUMBER() OVER (PARTITION BY DATE_TRUNC('hour', timestamp) ORDER BY temperature ASC) as rn_min
  FROM weather_data
)
SELECT city, country, temperature,
       CASE WHEN rn_max = 1 THEN 'Highest' WHEN rn_min = 1 THEN 'Lowest' END as indicator,
       DATE_TRUNC('hour', timestamp) as hour
FROM hourly_temps
WHERE rn_max = 1 OR rn_min = 1;


In [None]:
Each Day:
Similar to the hourly query, you can replace DATE_TRUNC('hour', timestamp) with DATE(timestamp) to get daily data.

Each Week:
Similar to the hourly query, you can replace DATE_TRUNC('hour', timestamp) with EXTRACT(week FROM timestamp) to get weekly data.



In [None]:
# Number of Times it Rained:
# 
SELECT city, country, COUNT(*) as rain_count_last_day
FROM weather_data
WHERE condition = 'Rainy' AND timestamp >= NOW() - INTERVAL '1 day'
GROUP BY city, country;

SELECT city, country, COUNT(*) as rain_count_last_week
FROM weather_data
WHERE condition = 'Rainy' AND timestamp >= NOW() - INTERVAL '7 days'
GROUP BY city, country;
