# Open Meteo Weather Analysis - Warsaw

In [None]:
!pip install openmeteo-requests requests-cache retry-requests numpy pandas

In [None]:
import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 52.2298,
	"longitude": 21.0118,
	"start_date": "2000-01-01",
	"end_date": "2025-11-30",
	"daily": ["weather_code", "temperature_2m_mean", "rain_sum", "snowfall_sum", "precipitation_hours", "daylight_duration", "wind_speed_10m_max", "wind_direction_10m_dominant"],
}
responses = openmeteo.weather_api(url, params=params)

response = responses[0]
print(f"Coordinates: {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation: {response.Elevation()} m asl")
print(f"Timezone difference to GMT+0: {response.UtcOffsetSeconds()}s")

daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(1).ValuesAsNumpy()
daily_rain_sum = daily.Variables(2).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(3).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(4).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
daily_wind_direction_10m_dominant = daily.Variables(7).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end =  pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["weather_code"] = daily_weather_code
daily_data["temperature"] = daily_temperature_2m_mean
daily_data["rain"] = daily_rain_sum
daily_data["snowfall"] = daily_snowfall_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["wind_speed_max"] = daily_wind_speed_10m_max
daily_data["wind_direction"] = daily_wind_direction_10m_dominant

df = pd.DataFrame(data = daily_data)
print("\nDaily data\n", df)

df.to_csv('warsaw_weather_daily_2000_2025.csv', index=False)
print("Saved file: warsaw_weather_daily_2000_2025.csv")

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df['date'] = df['date'].dt.date
df.head()

In [None]:
df['date'] = pd.to_datetime(df['date'])
df.info()

In [None]:
import matplotlib.pyplot as plt
plt.plot(df['date'],df['temperature'], label='Average Daily Temperature', linewidth=0.3)
plt.title('Weather in Warsaw (2000-2025)')
plt.xlabel('Year')
plt.ylabel('Temperature (°C)')
plt.legend(loc='upper left', bbox_to_anchor=(1, 1.05))
plt.savefig('Plots/WeatherInWarsaw.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
min_index = df['temperature'].idxmin()
lowest_temp = df.loc[min_index]
print(f"Data: {lowest_temp['date']}")
print(f"Temperature: {lowest_temp['temperature']} °C")

In [None]:
df['year'] = df['date'].dt.year

yearly_avg = df.groupby('year')['temperature'].mean()
plt.plot(yearly_avg.index, yearly_avg.values, marker='o', color='purple')
avg = df['temperature'].mean()
plt.title('Yearly Average Temperature in Warsaw (2000-2025)')
plt.xlabel('Year')
plt.ylabel('Average Temperature (°C)')
plt.savefig('Plots/YearlyAvgTempInWarsaw.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
yearly_sum_rain = df.groupby('year')['rain'].sum()
plt.plot(yearly_sum_rain.index, yearly_sum_rain.values, marker='o')
avg_annual_rain = yearly_sum_rain.mean()
plt.axhline(y=avg_annual_rain, linestyle='--', label=f'Average of all ({avg_annual_rain:.0f} mm)')
plt.title('Yearly Sum of Rainfall in Warsaw (2000-2025)')
plt.xlabel('Year')
plt.ylabel('Rainfall (mm)')
plt.legend()
plt.savefig('Plots/YearlySumOfRainfallInWarsaw.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
yearly_sum_snow = df.groupby('year')['snowfall'].sum()
plt.plot(yearly_sum_snow.index, yearly_sum_snow.values, marker='o')
avg_annual_snow = yearly_sum_snow.mean()
plt.axhline(y=avg_annual_snow, linestyle='--', label=f'Average of all ({avg_annual_snow:.0f} mm)')
plt.title('Yearly Sum of Snowfall in Warsaw (2000-2025)')
plt.xlabel('Year')
plt.ylabel('Snowfall (mm)')
plt.savefig('Plots/YearlySumOfSnowfallInWarsaw.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
import seaborn as sns

df['month'] = df['date'].dt.month

plt.figure(figsize=(8,8))
sns.boxplot(x='month', y='temperature', data=df, palette="coolwarm")

plt.title('Temperature in Exact Months (2000-2025)')
plt.xlabel('Month')
plt.ylabel('Temperature (°C)')
plt.savefig('Plots/TempInExactMonthsInWarsaw.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
used_cols = [
    'temperature',
    'rain',
    'snowfall',
    'wind_speed_max',
    'daylight_duration'   
]

correlation = df[used_cols].corr()
sns.heatmap(correlation, cmap = 'coolwarm', annot=True, vmin= -1, vmax=1, fmt='.2f')
plt.title('Weather Correlation Matrix in Warsaw')
plt.savefig('Plots/WeatherCorrelationMatrixInWarsaw.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
def qualify_weather(temp):
    if temp<0:
        return "Freezing"
    elif temp<5:
        return "Cold"
    elif temp< 15:
        return "Moderate"
    elif temp<25:
        return "Warm"
    else:
        return "Hot"

In [None]:
df['category'] = df['temperature'].apply(qualify_weather)
df['category']

In [None]:
weather_tab = pd.crosstab(df['year'], df['category'])

chosen_cats = weather_tab[['Freezing','Hot']]
sns.lineplot(data=chosen_cats)
plt.title('Freezing Vs Hot Days (2000-2025)')
plt.ylabel('Number of Days in the Year')
plt.xlabel('Year')
plt.grid(True)
plt.savefig('Plots/FreezingVsHotDays.png', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
df['category'].value_counts()