# Collecting weather data 
> **OpenWeatherMap API**  

In [1]:
import requests
import pandas as pd
import datetime
import dotenv
import os
from dotenv import load_dotenv
from datetime import datetime
from pytz import timezone 


In [3]:
dotenv.load_dotenv()

True

In [4]:
tz = 'Europe/Berlin'
datetime.now(timezone(tz))

datetime.datetime(2025, 10, 15, 17, 13, 39, 384491, tzinfo=<DstTzInfo 'Europe/Berlin' CEST+2:00:00 DST>)

In [5]:
OpenWeatherAPIKey=os.environ['OpenWeatherAPIKey']
password_mysql=os.getenv('mysql_password')
my_user_mysql=os.getenv('username')
my_sql_port=os.getenv('my_port')
my_localhost=os.getenv('localhost')

In [6]:
schema = "gans"
host = my_localhost
user = my_user_mysql
password = password_mysql
port = my_sql_port
connection_string = f'mysql+pymysql://{user}:{password}@{host}:{port}/{schema}'

## Creating the function

Using data from the **`cities`** table in Gans Database

In [7]:
cities_df_from_sql= pd.read_sql("cities", con=connection_string)
cities_df_from_sql

Unnamed: 0,city_id,city_name,country
0,1,Berlin,Germany
1,2,Hamburg,Germany
2,3,Munich,Germany


In [10]:
def city_forecast(cities_df):
    all_forecasts = []  
    
    for _, row in cities_df.iterrows():  # Iterate over rows to access both city_name & city_id
        city = row["city_name"]
        city_id = row["city_id"]  # Get city_id directly from the DataFrame

        # Getting Geo-Data city details using geocoding API 
        geo_url = f'http://api.openweathermap.org/geo/1.0/direct?q={city}&limit=5&appid={OpenWeatherAPIKey}'
        geo_json = requests.get(geo_url).json()

        if not geo_json:
            print(f"Error getting coords for {city}.")
            continue

        lat, lon = geo_json[0]['lat'], geo_json[0]['lon']

        # Getting forecast metrics 
        url = f"https://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={OpenWeatherAPIKey}&units=metric"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Error getting forecast metrics for {city}: {response.status_code}")
            continue

        weather_json = response.json()
        time_retrieved = datetime.now(timezone(tz))
       # city_data = geo_json[0]['name']

        # Extracting forecast metrics 
        for forecast in weather_json["list"][:40]: 
            main_data = {
                "city_id": city_id,  # Add city_id
                #"city_name": city_data,
                "forecast_date": forecast["dt_txt"], 
                "weather_desc": forecast.get("weather", [{}])[0].get("main", None),
                "temp": forecast.get("main", {}).get("temp", None),
                "feels_like": forecast.get("main", {}).get("feels_like", None),
                "temp_min": forecast.get("main", {}).get("temp_min", None),
                "temp_max": forecast.get("main", {}).get("temp_max", None),
                "humidity": forecast.get("main", {}).get("humidity", None),
                "wind_speed": forecast.get("wind", {}).get("speed", None),
                "visibility": forecast.get("visibility", None), 
                "pop": forecast.get("pop", None),
                "rain": forecast.get("rain", {}).get("3h", None),
                "snow": forecast.get("snow", {}).get("3h", None),
                "time_retrieved": time_retrieved
            }
            all_forecasts.append(main_data)

    # Convert to DataFrame
    return pd.DataFrame(all_forecasts)


In [11]:
weather_df = pd.DataFrame(city_forecast(cities_df_from_sql))
weather_df

Unnamed: 0,city_id,forecast_date,weather_desc,temp,feels_like,temp_min,temp_max,humidity,wind_speed,visibility,pop,rain,snow,time_retrieved
0,1,2025-10-15 18:00:00,Clouds,13.94,13.35,13.94,14.05,75,1.69,10000,0.0,,,2025-10-15 17:14:28.827111+02:00
1,1,2025-10-15 21:00:00,Clouds,13.02,12.49,12.59,13.02,81,2.57,10000,0.0,,,2025-10-15 17:14:28.827111+02:00
2,1,2025-10-16 00:00:00,Rain,10.95,10.35,10.95,10.95,86,2.94,10000,0.2,0.15,,2025-10-15 17:14:28.827111+02:00
3,1,2025-10-16 03:00:00,Rain,12.14,11.42,12.14,12.14,77,3.40,10000,0.2,0.10,,2025-10-15 17:14:28.827111+02:00
4,1,2025-10-16 06:00:00,Clouds,11.94,11.12,11.94,11.94,74,4.17,10000,0.0,,,2025-10-15 17:14:28.827111+02:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
115,3,2025-10-20 03:00:00,Clouds,4.99,4.08,4.99,4.99,85,1.39,10000,0.0,,,2025-10-15 17:14:29.366524+02:00
116,3,2025-10-20 06:00:00,Clouds,4.69,3.14,4.69,4.69,81,1.88,10000,0.0,,,2025-10-15 17:14:29.366524+02:00
117,3,2025-10-20 09:00:00,Clouds,11.87,10.47,11.87,11.87,52,1.75,10000,0.0,,,2025-10-15 17:14:29.366524+02:00
118,3,2025-10-20 12:00:00,Clouds,15.99,14.77,15.99,15.99,43,2.06,10000,0.0,,,2025-10-15 17:14:29.366524+02:00


In [12]:
weather_df.to_sql('weather',
                if_exists='append',
                con=connection_string,
                index=False)

120