# Working with open weather API

##### https://openweathermap.org/


This Python notebook demonstrates a complete workflow for working with the OpenWeather API and transforming raw weather data into structured, meaningful insights. The project showcases how API data can be requested, validated, processed, and saved in a way that is both reusable and production-friendly.


In [1]:
# Importing libraries

import requests
import pandas as pd
import os

In [2]:
# Importing API Key
# The API key is stored in the OS Environment, for security pursose.
# You can get your own free API key from "https://openweathermap.org/api"

try:
    api_key = os.getenv("weather_api")
except KeyError:
    raise RuntimeError("API key not set. Please set OPENWEATHER_API_KEY.")

### 1)  Fetching current weather data

In [3]:
# base url
base_url = "https://api.openweathermap.org/data/2.5"

In [4]:
response = requests.get(f"{base_url}/weather?q=Tokyo&appid={api_key}")
response                  

<Response [200]>

#### 1.1) Fetching current weather data for single location

In [5]:
# Function to fetch real time weather data for any named city or any specific location(using geographical coordinates)

def get_current_data(city=None, lat=None, lon = None):
    """ get current weather location of any location or city
    Args:
        1)city (str) : City name 
                or 
        2)lat (int): latitude 
        3)lon (int) longitude
    **Either provide city name or lat and lon
    Returns:
        A JSON file
    """

    if city:
        url = f"{base_url}/weather?q={city}&appid={api_key}&units=metric"
    elif lat is not None and lon is not None:
        url = f"{base_url}/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
    else:
        print("Either provide a city name or latitude & longitude values.")
        return None
        
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.json()

    except requests.exceptions.Timeout:
        print("Error: Request timed out")

    except requests.exceptions.ConnectionError:
        print("Error: Network problem")

    except requests.exceptions.HTTPError:
        print("Error: Invalid city name or coordinates")

    except Exception as e:
        print("Unexpected error:", e)

    return None

In [6]:
# Reading row JSON file
data = get_current_data("New Delhi")
data

{'coord': {'lon': 77.2311, 'lat': 28.6128},
 'weather': [{'id': 741, 'main': 'Fog', 'description': 'fog', 'icon': '50d'}],
 'base': 'stations',
 'main': {'temp': 10.09,
  'feels_like': 9.43,
  'temp_min': 10.09,
  'temp_max': 10.09,
  'pressure': 1017,
  'humidity': 87,
  'sea_level': 1017,
  'grnd_level': 991},
 'visibility': 800,
 'wind': {'speed': 1.54, 'deg': 240},
 'clouds': {'all': 40},
 'dt': 1766718486,
 'sys': {'type': 1,
  'id': 9165,
  'country': 'IN',
  'sunrise': 1766713306,
  'sunset': 1766750470},
 'timezone': 19800,
 'id': 1261481,
 'name': 'New Delhi',
 'cod': 200}

#### 1.2) Function to filter current weather data
- This function extracat only useful information from the row JSON file

In [7]:
# function that extract only useful information from the row JSON file and stores in a python dictionary.

def filter_current_data(data):
    """ This function takes a JSON file as input and returns important parameters from the file
    Args:
        data(JSON): current weather JSON file
    Returns:
        dict: a well formated python dictionary"""
        
    mydict = {"date": data["dt"],
          "city": data["name"],
          "weather_description": data["weather"][0]["description"],
          "temperature": data["main"]["temp"],
          "feels_like": data["main"]["feels_like"]    ,
          "pressure": data["main"]["pressure"],
          "humidity": data["main"]["humidity"],
          "visibility": data["visibility"],        
          "wind_speed": data["wind"]["speed"],
          "sunrise": data["sys"]["sunrise"],
          "sunset": data["sys"]["sunset"],
          "long": data["coord"]["lon"],
          "lat": data["coord"]["lat"],
          "timezone" : data["timezone"],
          "country": data["sys"]["country"]}
    return mydict

In [8]:
# output
filter_current_data(data)

{'date': 1766718486,
 'city': 'New Delhi',
 'weather_description': 'fog',
 'temperature': 10.09,
 'feels_like': 9.43,
 'pressure': 1017,
 'humidity': 87,
 'visibility': 800,
 'wind_speed': 1.54,
 'sunrise': 1766713306,
 'sunset': 1766750470,
 'long': 77.2311,
 'lat': 28.6128,
 'timezone': 19800,
 'country': 'IN'}

#### 1.3) Fetching current weather information for multiple locations.

In [9]:
## demo 
# Getting multiple cities weather information
cities1 = ["Tokyo", "New York", "New Delhi", "Beijing", "Seoul"]
mylist = []

for i,city in enumerate(cities1):

    data = get_current_data(city)
    mydict = filter_current_data(data)
    mylist.append(mydict)
    print(i, end = ", ")

0, 1, 2, 3, 4, 

#### This function fetch current weather data for multiple cities 

In [10]:
# Function for fetching current weather data for multiple cities

def get_multiple_city(cities):
    """ Fetch weather data for multipel cities.
    Args:
        cities (list of str): List of city names.
    Returns:
        list of dict: Eact dict contains processed weather info."""

    mylist = []

    for i,city in enumerate(cities):
        try:
            data = get_current_data(city = city)
            if data:
                mylist.append(filter_current_data(data))
        except:
            print(f"Error fetching {city}: {e}")
        print(i, end=", ")

    return mylist

In [11]:
# output
data = get_multiple_city(["New York", "Jaipur", "New Delhi", "asdf", "Tokyo"])

0, 1, 2, Error: Invalid city name or coordinates
3, 4, 

In [12]:
pd.DataFrame(data)

Unnamed: 0,date,city,weather_description,temperature,feels_like,pressure,humidity,visibility,wind_speed,sunrise,sunset,long,lat,timezone,country
0,1766718822,New York,clear sky,-0.43,-6.33,1020,45,10000,6.69,1766665105,1766698432,-74.006,40.7143,-18000,US
1,1766718960,Jaipur,mist,11.62,10.69,1018,71,2000,0.51,1766713420,1766751036,75.8167,26.9167,19800,IN
2,1766718486,New Delhi,fog,10.09,9.43,1017,87,800,1.54,1766713306,1766750470,77.2311,28.6128,19800,IN
3,1766718803,Tokyo,few clouds,10.87,8.95,1003,36,10000,13.38,1766699353,1766734432,139.6917,35.6895,32400,JP


#### This function fetch current weather data for multiple cities 

In [13]:
def get_multiple_location(coordinates):
    """
    Fetch weather data for multiple coordinates.
    Args:
        coordinates (list of tuples): List of (lat, lon) tuples.
    Returns:
        list of dict: Each dict contains processed weather info.
    """

    mylist = []

    for i, (lat, lon) in enumerate(coordinates):
        try:
            data = get_current_data(lat=lat, lon=lon)
            if data:
                mylist.append(filter_current_data(data))
        except Exception as e:
            print(f"Error fetching ({lat}, {lon}): {e}")
        print(i, end=", ")
    
    return mylist

In [14]:
loc = get_multiple_location([(29.1667,75.7167	), (28.6128, 77.2311), (1234,63)])

0, 1, Error: Invalid city name or coordinates
2, 

In [15]:
df = pd.DataFrame(loc)
df

Unnamed: 0,date,city,weather_description,temperature,feels_like,pressure,humidity,visibility,wind_speed,sunrise,sunset,long,lat,timezone,country
0,1766719290,Hisar,overcast clouds,14.97,13.33,1018,31,10000,1.17,1766713745,1766750758,75.7167,29.1667,19800,IN
1,1766719290,New Delhi,fog,10.09,9.43,1017,87,800,1.54,1766713306,1766750470,77.2311,28.6128,19800,IN


### 2) Fetching real time data for 100 cities and storing it into csv file 

In [16]:
cities = [
    "Tokyo", "New York", "London", "Paris", "Shanghai", "New Delhi", "Beijing", "Seoul", "Los Angeles", "Singapore",
    "Madrid", "Rome", "Dubai", "Berlin", "Barcelona", "Sydney", "Hong Kong", "Toronto", "Chicago", "Melbourne",
    "Amsterdam", "Istanbul", "Vienna", "Prague", "Athens", "Kyoto", "Mexico City", "Saint Petersburg", "Cairo", "Mumbai",
    "Bangkok", "Osaka", "San Francisco", "Boston", "Washington DC", "Milan", "Taipei", "Frankfurt", "Zurich", "Stockholm",
    "Copenhagen", "Munich", "Lisbon", "Vancouver", "Brussels", "Helsinki", "Dublin", "Oslo", "Warsaw", "Budapest",
    "Rio de Janeiro", "Buenos Aires", "Cape Town", "Jerusalem", "Tel Aviv", "Auckland", "Edinburgh", "Geneva", "Hamburg", "Lyon",
    "Portland", "Austin", "Miami", "Atlanta", "Dallas", "Houston", "Seattle", "Chandigarh", "Philadelphia", "Montreal",
    "Jakarta", "Manila", "Guangzhou", "Chennai", "Bengaluru", "Moscow", "Kolkata", "Lagos", "Kinshasa", "Lima",
    "Bogot√°", "Johannesburg", "Nairobi", "Addis Ababa", "Riyadh", "Mecca", "Kuala Lumpur", "Jaipur", "Hanoi", "Valencia",
    "Shimla", "Varanasi", "Cusco", "Siem Reap", "Petra", "Antalya", "Venice", "Florence", "Santorini", "Dubrovnik"]

In [17]:
city_list = get_multiple_city(cities)

0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 

In [18]:
df = pd.DataFrame(city_list)
df.head()

Unnamed: 0,date,city,weather_description,temperature,feels_like,pressure,humidity,visibility,wind_speed,sunrise,sunset,long,lat,timezone,country
0,1766718803,Tokyo,few clouds,10.87,8.95,1003,36,10000,13.38,1766699353,1766734432,139.6917,35.6895,32400,JP
1,1766718822,New York,clear sky,-0.43,-6.33,1020,45,10000,6.69,1766665105,1766698432,-74.006,40.7143,-18000,US
2,1766719025,London,broken clouds,3.75,0.58,1027,80,10000,3.6,1766736344,1766764576,-0.1257,51.5085,0,GB
3,1766718965,Paris,clear sky,-1.35,-6.03,1023,86,10000,4.12,1766734999,1766764733,2.3488,48.8534,3600,FR
4,1766719295,Shanghai,clear sky,5.59,3.21,1030,41,10000,3.0,1766703053,1766739488,121.4581,31.2222,28800,CN


In [19]:
# Converting UTC timestamps to pandas datetime

df["date"] = pd.to_datetime(df["date"], unit="s")
df["sunrise"] = pd.to_datetime(df["sunrise"], unit="s")
df["sunset"] = pd.to_datetime(df["sunset"], unit="s")

In [20]:
# Adding local date time in the dataframe

df['date_local'] = df['date'] + pd.to_timedelta(df['timezone'], unit='s')
df['sunrise_local'] = df['sunrise'] + pd.to_timedelta(df['timezone'], unit='s')
df['sunset_local'] = df['sunset'] + pd.to_timedelta(df['timezone'], unit='s')

In [21]:
df.head(10)

Unnamed: 0,date,city,weather_description,temperature,feels_like,pressure,humidity,visibility,wind_speed,sunrise,sunset,long,lat,timezone,country,date_local,sunrise_local,sunset_local
0,2025-12-26 03:13:23,Tokyo,few clouds,10.87,8.95,1003,36,10000,13.38,2025-12-25 21:49:13,2025-12-26 07:33:52,139.6917,35.6895,32400,JP,2025-12-26 12:13:23,2025-12-26 06:49:13,2025-12-26 16:33:52
1,2025-12-26 03:13:42,New York,clear sky,-0.43,-6.33,1020,45,10000,6.69,2025-12-25 12:18:25,2025-12-25 21:33:52,-74.006,40.7143,-18000,US,2025-12-25 22:13:42,2025-12-25 07:18:25,2025-12-25 16:33:52
2,2025-12-26 03:17:05,London,broken clouds,3.75,0.58,1027,80,10000,3.6,2025-12-26 08:05:44,2025-12-26 15:56:16,-0.1257,51.5085,0,GB,2025-12-26 03:17:05,2025-12-26 08:05:44,2025-12-26 15:56:16
3,2025-12-26 03:16:05,Paris,clear sky,-1.35,-6.03,1023,86,10000,4.12,2025-12-26 07:43:19,2025-12-26 15:58:53,2.3488,48.8534,3600,FR,2025-12-26 04:16:05,2025-12-26 08:43:19,2025-12-26 16:58:53
4,2025-12-26 03:21:35,Shanghai,clear sky,5.59,3.21,1030,41,10000,3.0,2025-12-25 22:50:53,2025-12-26 08:58:08,121.4581,31.2222,28800,CN,2025-12-26 11:21:35,2025-12-26 06:50:53,2025-12-26 16:58:08
5,2025-12-26 03:08:06,New Delhi,fog,10.09,9.43,1017,87,800,1.54,2025-12-26 01:41:46,2025-12-26 12:01:10,77.2311,28.6128,19800,IN,2025-12-26 08:38:06,2025-12-26 07:11:46,2025-12-26 17:31:10
6,2025-12-26 03:15:28,Beijing,scattered clouds,-2.06,-4.19,1029,28,10000,1.55,2025-12-25 23:34:28,2025-12-26 08:55:02,116.3972,39.9075,28800,CN,2025-12-26 11:15:28,2025-12-26 07:34:28,2025-12-26 16:55:02
7,2025-12-26 03:13:31,Seoul,clear sky,-7.24,-14.07,1028,32,10000,5.14,2025-12-25 22:45:16,2025-12-26 08:19:35,126.9778,37.5683,32400,KR,2025-12-26 12:13:31,2025-12-26 07:45:16,2025-12-26 17:19:35
8,2025-12-26 03:17:35,Los Angeles,overcast clouds,16.17,16.14,1019,88,10000,5.14,2025-12-25 14:56:34,2025-12-26 00:49:45,-118.2437,34.0522,-28800,US,2025-12-25 19:17:35,2025-12-25 06:56:34,2025-12-25 16:49:45
9,2025-12-26 03:19:57,Singapore,broken clouds,29.51,34.53,1010,73,10000,4.63,2025-12-25 23:03:34,2025-12-26 11:06:21,103.8501,1.2897,28800,SG,2025-12-26 11:19:57,2025-12-26 07:03:34,2025-12-26 19:06:21


In [22]:
# Exporting data to csv
df.to_csv("weather.csv", index=False)

### 3) Fetching 5 days forcast data for any location

- 5 day forecast is available at any location on the globe. It includes weather forecast data with 3-hour step. Forecast is available in JSON or XML format.

In [23]:
# This function fetch 5 days forecast day with 3-hour step for any location.

def get_forecast_data(city=None, lat=None, lon=None):
    """ get forecast data of any location or city
    Args:
        1)city (str) : City name 
                or 
        2)lat (int): latitude 
        3)lon (int) longitude
    **Either provide city name or lat and lon
    Returns:
        A JSON file
    """
    
    if city:
        url = f"{base_url}/forecast?q={city}&appid={api_key}&units=metric"
    if lat is not None and lon is not None:
        url = f"{base_url}/forecast?lat={lat}&lon={lon}&appid={api_key}&units=metric"
    else:
        print("Either provide a city name or latitude & longitude values.")
        return None

    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        return response.json()

    except requests.exceptions.Timeout:
        print("Error: Request timed out")

    except requests.exceptions.ConnectionError:
        print("Error: Network problem")

    except requests.exceptions.HTTPError:
        print("Error: Invalid city name or coordinates")

    except Exception as e:
        print("Unexpected error:", e)

    return None

In [24]:
data = get_forecast_data(lat=29.1667, lon=75.7167)

#### This function filter the forcasted Data and extract only useful information

In [25]:
def filter_forecast_data(data):
    """ filter forecast data and return useful information
    Args:
        data(JSON): forecast data
    Returns:
        pyhton list:the python list contains two elements
                1) the metadata dictionary, which stores location into
                2) the weather list, which contains forecast data for the location
    """

    
    mainlist = []
    infolist = []
    
    city_info = {
        "name": data["city"]["name"],
        "latitude": data["city"]["coord"]["lat"],
        "longitude": data["city"]["coord"]["lon"],
        "country": data["city"]["country"],}

    mainlist.append(city_info)
    
    for i in range(len(data["list"])):
        mylist = data["list"][i]
        mydict = { "date(UTC)": mylist["dt"],
               "temperature(celsius)" : mylist["main"]["temp"],
               "feels_like(celsius)": mylist["main"]["feels_like"],
                "visibility(KMs)": mylist["visibility"],
               "pressure(hPa)": mylist["main"]["pressure"],
               "humidity(%)": mylist["main"]["humidity"],
               "description": mylist["weather"][0]["description"],
               "wind_speed(m/s)": mylist["wind"]["speed"],
               "date_str": mylist["dt_txt"]
             }
        infolist.append(mydict)
    mainlist.append(infolist)
    return mainlist

In [26]:
filter_forecast_data(data)[0]

{'name': 'Hisar', 'latitude': 29.1667, 'longitude': 75.7167, 'country': 'IN'}

In [27]:
pd.DataFrame(filter_forecast_data(data)[1]).head()

Unnamed: 0,date(UTC),temperature(celsius),feels_like(celsius),visibility(KMs),pressure(hPa),humidity(%),description,wind_speed(m/s),date_str
0,1766728800,16.78,15.22,10000,1018,27,overcast clouds,1.6,2025-12-26 06:00:00
1,1766739600,20.5,19.13,10000,1016,20,broken clouds,1.84,2025-12-26 09:00:00
2,1766750400,18.78,17.26,10000,1015,21,broken clouds,1.59,2025-12-26 12:00:00
3,1766761200,15.78,14.04,10000,1016,24,broken clouds,1.88,2025-12-26 15:00:00
4,1766772000,13.81,12.0,10000,1017,29,scattered clouds,1.76,2025-12-26 18:00:00


### 4) Fetching air quality data for any location

In [28]:
lon= -74.006
lat= 40.7143

url = f"http://api.openweathermap.org/data/2.5/air_pollution?lat={lat}&lon={lon}&appid={api_key}"

In [29]:
response = requests.get(url)
response.json()

{'coord': {'lon': -74.006, 'lat': 40.7128},
 'list': [{'main': {'aqi': 1},
   'components': {'co': 205.18,
    'no': 0,
    'no2': 11.89,
    'o3': 59.32,
    'so2': 2.01,
    'pm2_5': 2.94,
    'pm10': 5.12,
    'nh3': 0.6},
   'dt': 1766719281}]}