Step 1: Install Required Packages

In [1]:
!pip install requests pandas



Step 2: Import Libraries

In [2]:
import requests
import pandas as pd
from datetime import datetime

Step 3: API Configuration

In [3]:
API_KEY = '6958e1cd237f78c27d054892e05b2d24'
CITY = 'London'
BASE_URL = 'http://api.openweathermap.org/data/2.5/forecast'

Step 4: API Request

In [4]:
params = {
    'q': CITY,
    'appid': API_KEY,
    'units': 'metric'
}

response = requests.get(BASE_URL, params=params)
data = response.json()

if response.status_code != 200:
    print(f"Error: {data.get('message', 'Unknown error')}")
    exit()

Step 5: Extract Relevant Data

In [10]:
forecasts = data['list']

timestamps = []
temperatures = []
feels_like = []
humidity = []
wind_speed = []
weather_desc = []
rain_3h = []

for forecast in forecasts:
    timestamps.append(forecast['dt'])
    temperatures.append(forecast['main']['temp'])
    feels_like.append(forecast['main']['feels_like'])
    humidity.append(forecast['main']['humidity'])
    wind_speed.append(forecast['wind']['speed'])
    weather_desc.append(forecast['weather'][0]['description'])


    rain_3h.append(forecast.get('rain', {}).get('3h', None))

Step 6: Create DataFrame and Clean Data

In [11]:
weather_df = pd.DataFrame({
    'timestamp': timestamps,
    'temperature': temperatures,
    'feels_like': feels_like,
    'humidity': humidity,
    'wind_speed': wind_speed,
    'weather_description': weather_desc,
    'rain_3h': rain_3h
})

weather_df['datetime'] = pd.to_datetime(weather_df['timestamp'], unit='s')

weather_df.set_index('datetime', inplace=True)

weather_df.drop('timestamp', axis=1, inplace=True)

weather_df['rain_3h'] = weather_df['rain_3h'].fillna(0)

print(f"Duplicate rows: {weather_df.duplicated().sum()}")

print("\nData types before cleaning:")
print(weather_df.dtypes)

numeric_cols = ['temperature', 'feels_like', 'humidity', 'wind_speed', 'rain_3h']
weather_df[numeric_cols] = weather_df[numeric_cols].apply(pd.to_numeric, errors='coerce')

weather_df['weather_description'] = weather_df['weather_description'].str.lower().str.strip()

print("\nSummary statistics:")
print(weather_df[numeric_cols].describe())

temp_mask = (weather_df['temperature'] < -50) | (weather_df['temperature'] > 50)
weather_df.loc[temp_mask, 'temperature'] = weather_df['temperature'].median()

Duplicate rows: 0

Data types before cleaning:
temperature            float64
feels_like             float64
humidity                 int64
wind_speed             float64
weather_description     object
rain_3h                float64
dtype: object

Summary statistics:
       temperature  feels_like   humidity  wind_speed    rain_3h
count    40.000000    40.00000  40.000000    40.00000  40.000000
mean     10.725000     8.96250  61.850000     4.05225   0.018250
std       3.994083     4.53399  15.284566     1.01929   0.049917
min       4.670000     1.45000  35.000000     2.13000   0.000000
25%       7.592500     5.20250  50.250000     3.47750   0.000000
50%      10.640000     9.25000  59.500000     4.07500   0.000000
75%      13.237500    12.12000  76.250000     4.56000   0.000000
max      21.010000    20.08000  85.000000     6.70000   0.180000


Step 7: Save Cleaned Data

In [12]:
weather_df.to_csv('london_weather_forecast_cleaned.csv')

weather_df.to_excel('london_weather_forecast_cleaned.xlsx')

print("\nSample of cleaned data:")
print(weather_df.head())


Sample of cleaned data:
                     temperature  feels_like  humidity  wind_speed  \
datetime                                                             
2025-05-03 15:00:00        21.01       20.08        35        6.70   
2025-05-03 18:00:00        18.72       17.72        41        5.18   
2025-05-03 21:00:00        13.56       12.36        53        4.06   
2025-05-04 00:00:00         7.30        4.94        79        3.53   
2025-05-04 03:00:00         5.87        2.81        84        4.22   

                    weather_description  rain_3h  
datetime                                          
2025-05-03 15:00:00       broken clouds      0.0  
2025-05-03 18:00:00       broken clouds      0.0  
2025-05-03 21:00:00    scattered clouds      0.0  
2025-05-04 00:00:00          few clouds      0.0  
2025-05-04 03:00:00    scattered clouds      0.0  
