In [1]:
import pandas as pd


In [2]:
df_raw = pd.read_csv("raw_weather_air.csv")
df_raw

Unnamed: 0,city,lat,lon,time,temp_c,humidity_pct,wind_speed_ms,weather_code,aqi_eu,pm25,no2,o3,weather_desc
0,Lisbon,38.7167,-9.1333,2026-01-10T14:30,13.4,66,1.8,3,21,10.4,25.6,42.0,"Mainly clear, partly cloudy, and overcast"
1,Thessaloniki,40.6436,22.9309,2026-01-10T14:30,10.0,84,4.1,3,26,5.6,8.9,60.0,"Mainly clear, partly cloudy, and overcast"
2,Brussels,50.8505,4.3488,2026-01-10T14:30,-0.4,71,10.8,3,18,9.5,19.6,45.0,"Mainly clear, partly cloudy, and overcast"
3,Barcelona,41.3888,2.159,2026-01-10T14:30,12.3,47,15.3,2,25,2.4,6.8,63.0,"Mainly clear, partly cloudy, and overcast"
4,Berlin,52.5244,13.4105,2026-01-10T14:30,-3.0,71,6.3,0,25,6.1,11.8,62.0,Clear Sky


In [3]:
weather_map = {
    0: "Clear Sky",
    1: "Mainly clear, partly cloudy, and overcast",
    2: "Mainly clear, partly cloudy, and overcast",
    3: "Mainly clear, partly cloudy, and overcast",
    45: "Fog and depositing rime fog",
    48: "Fog and depositing rime fog",
    51: "Drizzle: Light, moderate, and dense intensity",
    53: "Drizzle: Light, moderate, and dense intensity",
    55: "Drizzle: Light, moderate, and dense intensity",
    56: "Freezing Drizzle: Light and dense intensity",
    57: "Freezing Drizzle: Light and dense intensity",
    61: "Rain: Slight, moderate and heavy intensity",
    63: "Rain: Slight, moderate and heavy intensity",
    65: "Rain: Slight, moderate and heavy intensity",
    66: "Freezing Rain: Light and heavy intensity",
    67: "Freezing Rain: Light and heavy intensity",
    71: "Snow fall: Slight, moderate, and heavy intensity",
    73: "Snow fall: Slight, moderate, and heavy intensity",
    75: "Snow fall: Slight, moderate, and heavy intensity",
    77: "Snow grains",
    80: "Rain showers: Slight, moderate, and violent",
    81: "Rain showers: Slight, moderate, and violent",
    82: "Rain showers: Slight, moderate, and violent",
    85: "Snow showers slight and heavy",
    86: "Snow showers slight and heavy",
    95: "Thunderstorm: Slight or moderate",
    96: "Thunderstorm with slight and heavy hail",
    99: "Thunderstorm with slight and heavy hail"
}

In [4]:
df = df_raw.copy()

# time -> datetime
df["time"] = pd.to_datetime(df["time"], errors="coerce")

# weather_code -> description
df["weather_desc"] = df["weather_code"].map(weather_map)

# optional: round a bit for readability
df["temp_c"] = df["temp_c"].round(1)
df["wind_speed_ms"] = df["wind_speed_ms"].round(1)
df["pm25"] = df["pm25"].round(1)
df["no2"] = df["no2"].round(1)
df["o3"] = df["o3"].round(1)

# reorder columns 
df = df[[
    "city", "lat", "lon", "time",
    "temp_c", "humidity_pct", "wind_speed_ms",
    "weather_code", "weather_desc",
    "aqi_eu", "pm25", "no2", "o3"
]]

df


Unnamed: 0,city,lat,lon,time,temp_c,humidity_pct,wind_speed_ms,weather_code,weather_desc,aqi_eu,pm25,no2,o3
0,Lisbon,38.7167,-9.1333,2026-01-10 14:30:00,13.4,66,1.8,3,"Mainly clear, partly cloudy, and overcast",21,10.4,25.6,42.0
1,Thessaloniki,40.6436,22.9309,2026-01-10 14:30:00,10.0,84,4.1,3,"Mainly clear, partly cloudy, and overcast",26,5.6,8.9,60.0
2,Brussels,50.8505,4.3488,2026-01-10 14:30:00,-0.4,71,10.8,3,"Mainly clear, partly cloudy, and overcast",18,9.5,19.6,45.0
3,Barcelona,41.3888,2.159,2026-01-10 14:30:00,12.3,47,15.3,2,"Mainly clear, partly cloudy, and overcast",25,2.4,6.8,63.0
4,Berlin,52.5244,13.4105,2026-01-10 14:30:00,-3.0,71,6.3,0,Clear Sky,25,6.1,11.8,62.0


In [5]:
# handle Nan values
df.fillna(0, inplace=True)

# drop duplicates
df.drop_duplicates(inplace=True)

In [6]:
# save to a new csv file
df.to_csv("weather_air_transformed.csv", index=False)