# Introduction to APIs for Data Acquisition: Johannesburg Weather Data

## 0. Requirements

In [2]:
pip install openmeteo-requests --quiet

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
pip install requests-cache retry-requests numpy pandas --quiet

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [12]:
import openmeteo_requests

import pandas as pd
from datetime import datetime, timedelta
import seaborn as sns
import matplotlib.pyplot as plt
import requests_cache
from retry_requests import retry

## 1. Data Extraction

In [5]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)


In [6]:

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 26.21,
	"longitude": 28.03,
	"start_date": "2025-04-28",
	"end_date": "2025-05-12",
	"daily": ["temperature_2m_mean", "temperature_2m_max", "temperature_2m_min", "daylight_duration", "precipitation_sum", "rain_sum", "wind_speed_10m_max", "weather_code"],
	"hourly": ["temperature_2m", "relative_humidity_2m", "surface_pressure", "rain", "cloud_cover", "wind_speed_100m", "wind_direction_100m", "soil_temperature_28_to_100cm", "soil_moisture_28_to_100cm"],
	"models": "best_match"
}

In [7]:
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 26.186290740966797°N 28.026317596435547°E
Elevation 414.0 m asl
Timezone NoneNone
Timezone difference to GMT+0 0 s


## 3. Trend Analysis

### Hourly Data Analysis

In [13]:
# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
hourly_surface_pressure = hourly.Variables(2).ValuesAsNumpy()
hourly_rain = hourly.Variables(3).ValuesAsNumpy()
hourly_cloud_cover = hourly.Variables(4).ValuesAsNumpy()
hourly_wind_speed_100m = hourly.Variables(5).ValuesAsNumpy()
hourly_wind_direction_100m = hourly.Variables(6).ValuesAsNumpy()
hourly_soil_temperature_28_to_100cm = hourly.Variables(7).ValuesAsNumpy()
hourly_soil_moisture_28_to_100cm = hourly.Variables(8).ValuesAsNumpy()

hourly_data = {"date": pd.date_range(
	start = pd.to_datetime(hourly.Time(), unit = "s", utc = True),
	end = pd.to_datetime(hourly.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = hourly.Interval()),
	inclusive = "left"
)}

hourly_data["temperature_2m"] = hourly_temperature_2m
hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
hourly_data["surface_pressure"] = hourly_surface_pressure
hourly_data["rain"] = hourly_rain
hourly_data["cloud_cover"] = hourly_cloud_cover
hourly_data["wind_speed_100m"] = hourly_wind_speed_100m
hourly_data["wind_direction_100m"] = hourly_wind_direction_100m
hourly_data["soil_temperature_28_to_100cm"] = hourly_soil_temperature_28_to_100cm
hourly_data["soil_moisture_28_to_100cm"] = hourly_soil_moisture_28_to_100cm

hourly_dataframe = pd.DataFrame(data = hourly_data)

In [9]:
hourly_dataframe

Unnamed: 0,date,temperature_2m,relative_humidity_2m,surface_pressure,rain,cloud_cover,wind_speed_100m,wind_direction_100m,soil_temperature_28_to_100cm,soil_moisture_28_to_100cm
0,2025-04-28 00:00:00+00:00,19.874001,26.721901,966.051636,0.0,0.0,25.369785,21.650501,22.174002,0.01
1,2025-04-28 01:00:00+00:00,19.024000,28.582645,965.345459,0.0,0.0,20.304924,24.065628,22.224001,0.01
2,2025-04-28 02:00:00+00:00,18.524000,30.244455,965.170898,0.0,0.0,19.738541,24.227736,22.224001,0.01
3,2025-04-28 03:00:00+00:00,18.074001,32.018841,964.908630,0.0,0.0,19.521496,32.957428,22.224001,0.01
4,2025-04-28 04:00:00+00:00,18.124001,32.613075,965.107117,0.0,0.0,18.694021,38.745987,22.224001,0.01
...,...,...,...,...,...,...,...,...,...,...
355,2025-05-12 19:00:00+00:00,28.224001,18.616549,966.282837,0.0,33.0,12.860870,46.701324,23.374001,0.01
356,2025-05-12 20:00:00+00:00,27.274000,19.608904,966.236206,0.0,1.0,11.886816,57.994659,23.374001,0.01
357,2025-05-12 21:00:00+00:00,26.474001,20.189835,965.925110,0.0,1.0,10.196647,69.325500,23.374001,0.01
358,2025-05-12 22:00:00+00:00,25.674002,21.017382,965.422668,0.0,0.0,6.698806,83.829926,23.374001,0.01


### Daily Data Analysis

In [14]:
# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(3).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(4).ValuesAsNumpy()
daily_rain_sum = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
daily_weather_code = daily.Variables(7).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["weather_code"] = daily_weather_code

daily_dataframe = pd.DataFrame(data = daily_data)

In [11]:
daily_dataframe

Unnamed: 0,date,temperature_2m_mean,temperature_2m_max,temperature_2m_min,daylight_duration,precipitation_sum,rain_sum,wind_speed_10m_max,weather_code
0,2025-04-28 00:00:00+00:00,25.811499,32.023998,18.074001,47126.386719,0.0,0.0,18.222721,0.0
1,2025-04-29 00:00:00+00:00,28.582331,35.073997,21.274,47206.753906,0.0,0.0,31.1686,3.0
2,2025-04-30 00:00:00+00:00,22.465668,26.324001,17.974001,47286.261719,0.0,0.0,33.225971,2.0
3,2025-05-01 00:00:00+00:00,20.388582,24.874001,15.224,47364.828125,0.0,0.0,20.775774,1.0
4,2025-05-02 00:00:00+00:00,19.961498,25.174002,13.774,47442.375,0.0,0.0,27.753292,0.0
5,2025-05-03 00:00:00+00:00,19.186499,23.524,14.374,47518.839844,0.0,0.0,31.119703,0.0
6,2025-05-04 00:00:00+00:00,19.351084,24.374001,13.824,47594.132812,0.0,0.0,25.52829,0.0
7,2025-05-05 00:00:00+00:00,22.034416,27.674002,14.774,47668.175781,0.0,0.0,16.056362,3.0
8,2025-05-06 00:00:00+00:00,24.074003,29.424002,16.674002,47740.894531,0.0,0.0,15.882896,1.0
9,2025-05-07 00:00:00+00:00,26.498999,32.273998,19.774,47812.304688,0.0,0.0,20.215368,0.0


In [15]:
cols_to_convert = [
    'temperature_2m_mean',
    'temperature_2m_max',
    'temperature_2m_min',
    'wind_speed_10m_max',
    'precipitation_sum',
    'rain_sum',
    'daylight_duration'
]
 
daily_dataframe[cols_to_convert] = daily_dataframe[cols_to_convert].apply(pd.to_numeric, errors='coerce')
 

In [16]:
daily_dataframe = daily_dataframe.round(2)
 
pd.set_option('display.float_format', '{:.2f}'.format)

In [18]:
daily_dataframe['only_date'] = pd.to_datetime(daily_dataframe['time']).dt.date

KeyError: 'time'

In [17]:
plt.figure(figsize=(10, 5))
plt.plot(daily_dataframe['only_date'], daily_dataframe['temperature_2m_mean'], marker='o')
plt.title('Average Temperature Over Time')
plt.xlabel('Date')
plt.ylabel('Mean Temperature (°C)')
plt.xticks(rotation=45)
plt.grid(True)
plt.tight_layout()
plt.show()

KeyError: 'only_date'

<Figure size 1000x500 with 0 Axes>