# 1. Data Collection

Approximate distances of 12 German cities from the North Sea and the Baltic Sea:

#### Berlin
lat=52.52&lon=13.405\
North Sea: about 350 kilometers\
Baltic Sea: about 200 kilometers
#### Hannover
lat=52.375&lon=9.732\
North Sea: about 150 kilometers\
Baltic Sea: about 300 kilometers
#### Düsseldorf
lat=51.227&lon=6.773\
North Sea: about 40 kilometers\
Baltic Sea: about 350 kilometers
#### Stuttgart
lat=48.775&lon=9.182\
North Sea: about 400 kilometers\
Baltic Sea: about 600 kilometers
#### Nürnberg
lat=49.452&lon=11.076\
North Sea: about 350 kilometers\
Baltic Sea: about 500 kilometers
#### Würzburg
lat=49.791&lon=9.954\
North Sea: about 400 kilometers\
Baltic Sea: about 550 kilometers
#### Kaiserslautern
lat=49.444&lon=7.768\
North Sea: about 300 kilometers\
Baltic Sea: about 450 kilometers
#### Osnabrück
lat=52.279&lon=8.047\
North Sea: about 80 kilometers\
Baltic Sea: about 350 kilometers
#### Essen
lat=51.456&lon=7.012\
North Sea: about 60 kilometers\
Baltic Sea: about 350 kilometers
#### Leverkusen
lat=51.033&lon=6.983\
North Sea: about 70 kilometers\
Baltic Sea: about 360 kilometers
#### Bochum
lat=51.481&lon=7.216\
North Sea: about 80 kilometers\
Baltic Sea: about 350 kilometers
#### Gelsenkirchen
lat=51.517&lon=7.085\
North Sea: about 90 kilometers\
Baltic Sea: about 360 kilometers

In [1]:
import pandas as pd
import datetime
import time
import json
import requests

In [2]:
# Initialize information for the 12 cities
cities_info = {
    "Berlin": {"lat": 52.52, "lon": 13.405, "distN": 350, "distB": 200},
    "Hannover": {"lat": 52.375, "lon": 9.732, "distN": 150, "distB": 300},
    "Düsseldorf": {"lat": 51.227, "lon": 6.773, "distN": 40, "distB": 350},
    "Stuttgart": {"lat": 48.775, "lon": 9.182, "distN": 400, "distB": 600},
    "Nürnberg": {"lat": 49.452, "lon": 11.076, "distN": 350, "distB": 500},
    "Würzburg": {"lat": 49.791, "lon": 9.954, "distN": 400, "distB": 550},
    "Kaiserslautern": {"lat": 49.444, "lon": 7.768, "distN": 300, "distB": 450},
    "Osnabrück": {"lat": 52.279, "lon": 8.047, "distN": 80, "distB": 350},
    "Essen": {"lat": 51.456, "lon": 7.012, "distN": 60, "distB": 350},
    "Leverkusen": {"lat": 51.033, "lon": 6.983, "distN": 70, "distB": 360},
    "Bochum": {"lat": 51.481, "lon": 7.216, "distN": 80, "distB": 350},
    "Gelsenkirchen": {"lat": 51.517, "lon": 7.085, "distN": 90, "distB": 360}
}

In [3]:
# Define function to fetch and process weather data
def get_weather_data(city_name, city_info):
    url = "https://api.openweathermap.org/data/2.5/weather?lat=52.52&lon=13.405&appid=a8ec937ee7eb519f4d06e0699ef1154f"
    response = requests.get(url)
    city_data = response.json()
    
    data = {
        "temp": city_data["main"]["temp"] - 273.15,
        "humidity": city_data["main"]["humidity"],
        "pressure": city_data["main"]["pressure"],
        "description": city_data["weather"][0]["description"],
        "dt": datetime.datetime.fromtimestamp(city_data["dt"]),
        "wind_speed": city_data["wind"]["speed"],
        "wind_deg": city_data["wind"]["deg"],
        "distN": city_info["distN"],
        "distB": city_info["distB"]
    }
    return data

In [4]:
# Calculate time until the next full hour
now = datetime.datetime.now()
next_hour = (now + datetime.timedelta(hours=1)).replace(minute=0, second=0, microsecond=0)
seconds_until_next_hour = (next_hour - now).total_seconds()
print(f"Waiting {seconds_until_next_hour:.0f} seconds until the next full hour to start data collection...")
time.sleep(seconds_until_next_hour)

Waiting 17 seconds until the next full hour to start data collection...


In [None]:
# Start collecting data on every hour and half-hour
for city_name, city_info in cities_info.items():
    city_weather_data = []
    for _ in range(48):  # Collect data every 30 minutes, for 24 hours
        weather_data = get_weather_data(city_name, city_info)
        city_weather_data.append(weather_data)
        
        # Pause for 30 minutes (1800 seconds)
        time.sleep(1800)
    
    # Create DataFrame and save as a CSV file
    df_city = pd.DataFrame(city_weather_data)
    df_city.to_csv(f"{city_name}.csv", index=False)