<a href="https://colab.research.google.com/github/ArturRSO15/-/blob/main/Untitled2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:

import requests
import pandas as pd
from datetime import date, timedelta
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import numpy as np


latitude = 56.9496
longitude = 24.1052
end_date = date.today() - timedelta(days=3)
start_date = end_date - timedelta(days=365*2)
timezone = "Europe/Riga"


base_url = "https://archive-api.open-meteo.com/v1/archive"
params = {
    "latitude": latitude,
    "longitude": longitude,
    "start_date": start_date.isoformat(),
    "end_date": end_date.isoformat(),
    # Запрашиваем дневные переменные: max temp, min temp, суммарные осадки
    "daily": "temperature_2m_max,temperature_2m_min,precipitation_sum",
    "timezone": timezone
}

# Выполняем запрос и проверяем ответ
resp = requests.get(base_url, params=params)
resp.raise_for_status()
data = resp.json()


daily = data.get("daily", {})
df = pd.DataFrame({
    "date": pd.to_datetime(daily.get("time")),
    "tmax": daily.get("temperature_2m_max"),
    "tmin": daily.get("temperature_2m_min"),
    "precip": daily.get("precipitation_sum")
})


df = df.sort_values("date").reset_index(drop=True)


df["tmax_next_day"] = df["tmax"].shift(-1)

df = df.dropna(subset=["tmax_next_day"]).reset_index(drop=True)


df["tmax_lag1"] = df["tmax"].shift(1)
df["tmax_lag1"].fillna(df["tmax"].mean(), inplace=True)
df["t_range"] = df["tmax"] - df["tmin"]
df["day_of_year"] = df["date"].dt.dayofyear


feature_cols = ["tmax", "tmin", "precip", "tmax_lag1", "t_range", "day_of_year"]
X = df[feature_cols].astype(float)
y = df["tmax_next_day"].astype(float)


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)


model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"Данные с {start_date.isoformat()} по {end_date.isoformat()} для локации ({latitude},{longitude})")
print(f"Размер датасета: {len(df)} строк")
print(f"RMSE на тесте: {rmse:.3f} °C")


result = X_test.copy()
result["actual_tmax_next_day"] = y_test.values
result["pred_tmax_next_day"] = y_pred
print("\nПримеры (последние 5 строк теста):")
print(result.tail(5))


Данные с 2023-12-09 по 2025-12-08 для локации (56.9496,24.1052)
Размер датасета: 730 строк
RMSE на тесте: 2.220 °C

Примеры (последние 5 строк теста):
     tmax  tmin  precip  tmax_lag1  t_range  day_of_year  \
725   2.2   0.7     0.0        1.7      1.5        337.0   
726   3.9   0.7     0.1        2.2      3.2        338.0   
727   6.0   3.9     0.3        3.9      2.1        339.0   
728   6.6   3.4     6.0        6.0      3.2        340.0   
729   3.3   1.8     1.9        6.6      1.5        341.0   

     actual_tmax_next_day  pred_tmax_next_day  
725                   3.9            3.206577  
726                   6.0            4.550518  
727                   6.6            6.860849  
728                   3.3            6.877194  
729                   4.7            4.192197  


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["tmax_lag1"].fillna(df["tmax"].mean(), inplace=True)
