In [1]:
import openmeteo_requests

import pandas as pd
import requests_cache
from retry_requests import retry

cache_session = requests_cache.CachedSession('.cache')
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": 53.4186,
    "longitude": 59.0472,
    "start_date": "2025-04-20",
    "end_date": "2025-05-20",
    "daily": ["weather_code", "precipitation_sum", "precipitation_hours", "rain_sum", "snowfall_sum", "cloud_cover_mean", "cloud_cover_max", "cloud_cover_min", "temperature_2m_mean", "temperature_2m_max", "temperature_2m_min", "apparent_temperature_mean", "apparent_temperature_min", "apparent_temperature_max", "wind_direction_10m_dominant", "wind_gusts_10m_max", "wind_speed_10m_max", "relative_humidity_2m_mean", "relative_humidity_2m_max", "relative_humidity_2m_min", "shortwave_radiation_sum", "dew_point_2m_mean", "dew_point_2m_max", "dew_point_2m_min", "sunshine_duration", "surface_pressure_mean"],
    "timezone": "auto",
    "wind_speed_unit": "ms",
    "timeformat": "unixtime"
}
responses = openmeteo.weather_api(url, params=params)

response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(2).ValuesAsNumpy()
daily_rain_sum = daily.Variables(3).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(4).ValuesAsNumpy()
daily_cloud_cover_mean = daily.Variables(5).ValuesAsNumpy()
daily_cloud_cover_max = daily.Variables(6).ValuesAsNumpy()
daily_cloud_cover_min = daily.Variables(7).ValuesAsNumpy()
daily_temperature_2m_mean = daily.Variables(8).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(9).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(10).ValuesAsNumpy()
daily_apparent_temperature_mean = daily.Variables(11).ValuesAsNumpy()
daily_apparent_temperature_min = daily.Variables(12).ValuesAsNumpy()
daily_apparent_temperature_max = daily.Variables(13).ValuesAsNumpy()
daily_wind_direction_10m_dominant = daily.Variables(14).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(15).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(16).ValuesAsNumpy()
daily_relative_humidity_2m_mean = daily.Variables(17).ValuesAsNumpy()
daily_relative_humidity_2m_max = daily.Variables(18).ValuesAsNumpy()
daily_relative_humidity_2m_min = daily.Variables(19).ValuesAsNumpy()
daily_shortwave_radiation_sum = daily.Variables(20).ValuesAsNumpy()
daily_dew_point_2m_mean = daily.Variables(21).ValuesAsNumpy()
daily_dew_point_2m_max = daily.Variables(22).ValuesAsNumpy()
daily_dew_point_2m_min = daily.Variables(23).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(24).ValuesAsNumpy()
daily_surface_pressure_mean = daily.Variables(25).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
    start=pd.to_datetime(daily.Time(), unit="s", utc=True),
    end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
    freq=pd.Timedelta(seconds=daily.Interval()),
    inclusive="left"
),
    "weather_code": daily_weather_code,
    "precipitation_sum": daily_precipitation_sum,
    "precipitation_hours": daily_precipitation_hours,
    "rain_sum": daily_rain_sum,
    "snowfall_sum": daily_snowfall_sum,
    "cloud_cover_mean": daily_cloud_cover_mean,
    "cloud_cover_max": daily_cloud_cover_max,
    "cloud_cover_min": daily_cloud_cover_min,
    "temperature_2m_mean": daily_temperature_2m_mean,
    "temperature_2m_max": daily_temperature_2m_max,
    "temperature_2m_min": daily_temperature_2m_min,
    "apparent_temperature_mean": daily_apparent_temperature_mean,
    "apparent_temperature_min": daily_apparent_temperature_min,
    "apparent_temperature_max": daily_apparent_temperature_max,
    "wind_direction_10m_dominant": daily_wind_direction_10m_dominant,
    "wind_gusts_10m_max": daily_wind_gusts_10m_max,
    "wind_speed_10m_max": daily_wind_speed_10m_max,
    "relative_humidity_2m_mean": daily_relative_humidity_2m_mean,
    "relative_humidity_2m_max": daily_relative_humidity_2m_max,
    "relative_humidity_2m_min": daily_relative_humidity_2m_min,
    "shortwave_radiation_sum": daily_shortwave_radiation_sum,
    "dew_point_2m_mean": daily_dew_point_2m_mean,
    "dew_point_2m_max": daily_dew_point_2m_max,
    "dew_point_2m_min": daily_dew_point_2m_min,
    "sunshine_duration": daily_sunshine_duration,
    "surface_pressure_mean": daily_surface_pressure_mean}

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)
daily_dataframe.to_csv("daily_data2.csv", index=False)

Coordinates 53.39191436767578°N 58.971431732177734°E
Elevation 358.0 m asl
Timezone b'Asia/Yekaterinburg'b'GMT+5'
Timezone difference to GMT+0 18000 s
                        date  weather_code  precipitation_sum  \
0  2025-04-19 19:00:00+00:00           1.0                0.0   
1  2025-04-20 19:00:00+00:00           3.0                0.0   
2  2025-04-21 19:00:00+00:00           2.0                0.0   
3  2025-04-22 19:00:00+00:00          51.0                0.4   
4  2025-04-23 19:00:00+00:00           3.0                0.0   
5  2025-04-24 19:00:00+00:00          51.0                0.2   
6  2025-04-25 19:00:00+00:00          51.0                0.5   
7  2025-04-26 19:00:00+00:00          63.0                5.1   
8  2025-04-27 19:00:00+00:00          71.0                5.5   
9  2025-04-28 19:00:00+00:00          73.0                1.7   
10 2025-04-29 19:00:00+00:00           2.0                0.0   
11 2025-04-30 19:00:00+00:00          63.0                4.1   
12 2

### Форматирование данных

In [2]:
formatted_dataframe = daily_dataframe.drop(columns=["date"])
formatted_dataframe["weather_code"] = formatted_dataframe["weather_code"].astype(int)

### Форматируем коды погоды

In [3]:
formatted_dataframe.loc[(formatted_dataframe['weather_code'] >= 60) & (formatted_dataframe['weather_code'] <= 69), 'weather_code'] = 5 # Превращаем в просто "дождь"
formatted_dataframe.loc[(formatted_dataframe['weather_code'] >= 70) & (formatted_dataframe['weather_code'] <= 79), 'weather_code'] = 6 # Превращаем в просто "снегопад"
formatted_dataframe.loc[(formatted_dataframe['weather_code'] >= 50) & (formatted_dataframe['weather_code'] <= 59), 'weather_code'] = 4 # Превращаем в просто "морось"
formatted_dataframe.value_counts("weather_code")

weather_code
3    3823
6    1627
4    1624
5     459
2     397
1     245
0     207
Name: count, dtype: int64

In [4]:
formatted_dataframe.to_csv("formatted_data_for_learning.csv", index=False)