In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Create df

In [None]:
import numpy as np
import pandas as pd

In [None]:
temps = np.load('/content/drive/MyDrive/YADRO_Hack/temperature.npy')
clouds = np.load('/content/drive/MyDrive/YADRO_Hack/cloud_cover.npy')
humidity = np.load('/content/drive/MyDrive/YADRO_Hack/humidity_forecast5_2.npy')
elevation = np.load('/content/drive/MyDrive/YADRO_Hack/elevation.npy')
pressure = np.load('/content/drive/MyDrive/YADRO_Hack/pressure.npy')
wind_dir = np.load('/content/drive/MyDrive/YADRO_Hack/wind_dir.npy')
wind_speed = np.load('/content/drive/MyDrive/YADRO_Hack/wind_speed.npy')

In [None]:
#датасет разбит на диапазоны по часу измерения, каждый час имеет 900 строк для каждой из координат с соттветсвующими признаками
df = pd.DataFrame(data=[[hour, row, col, elevation[row][col], clouds[hour][row][col], humidity[hour][row][col],
                         pressure[hour][row][col], wind_dir[hour][row][col],
                         wind_speed[hour][row][col], temps[hour][row][col]]
                        for hour in range(43) for row in range(30) for col in range(30)],
                  columns=['hour', 'row', 'col', 'elevation', 'cloud', 'humidity', 'pressure', 'wind_dir', 'wind_speed', 'tempreture'])
df

In [None]:
df.elevation.max(), df.elevation.min()

# Check temps series for one value

In [None]:
df_1_1 = pd.DataFrame(data=[[hour, 0, 0, elevation[0][0], clouds[hour][0][0], humidity[hour][0][0], pressure[hour][0][0],
                             wind_dir[hour][0][0], wind_speed[hour][0][0], temps[hour][0][0]] for hour in range(43)],
                      columns=['hour', 'row', 'col', 'elevation', 'cloud', 'humidity', 'pressure', 'wind_dir', 'wind_speed', 'tempreture'])
df_1_1.head()

In [None]:
series = df_1_1['tempreture'].copy()
series.plot()

In [None]:
from datetime import datetime, timedelta


now = datetime(2024, 4, 24)
series.index = [pd.to_datetime(now + timedelta(hours=i)) for i in range(43)]
series.head()

In [None]:
#по идее ряд стационарный, тк p-value > 0.05 => не отвергаем гипотезу, что ряд стационарный
import statsmodels.api as sm


sm.tsa.stattools.kpss(series, regression='ct')

In [None]:
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.graphics.tsaplots import plot_pacf
import matplotlib.pyplot as plt


f, ax = plt.subplots(nrows=2, ncols=1, figsize=(5, 6))
plot_acf(series.values, lags=20, ax=ax[0])
plot_pacf(series.values, lags=20, ax=ax[1], method='ols')
plt.tight_layout()
plt.show()

In [None]:
import statsmodels.api as sm
import matplotlib.pyplot as plt

res = sm.tsa.seasonal_decompose(series, model='additive')
plt.rc("figure", figsize=(25,8))
resplot = res.plot()

# Baseline backtesting_forecaster for independent region

In [None]:
!pip install skforecast

In [None]:
from skforecast.ForecasterBaseline import ForecasterEquivalentDate
from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.model_selection import bayesian_search_forecaster
from skforecast.model_selection import backtesting_forecaster

In [None]:
now = datetime(2024, 4, 25)
df_1_1.index = [pd.to_datetime(now + timedelta(hours=i)) for i in range(43)]
df_1_1 = df_1_1.asfreq('H')
df_1_1.head()

In [None]:
#len_val = 5
n = 38
data_train = df_1_1.iloc[:n, :]
data_val = df_1_1.iloc[n:, :]

In [None]:
forecaster = ForecasterEquivalentDate(
                 offset = pd.DateOffset(days=1),
                 n_offsets = 1,
             )

forecaster.fit(y=df_1_1.iloc[:n, -1])
forecaster

In [None]:
metric_baseline, predictions = backtesting_forecaster(
                                   forecaster         = forecaster,
                                   y                  = df_1_1['tempreture'],
                                   steps              = 5,
                                   metric             = 'mean_absolute_percentage_error',
                                   initial_train_size = len(df_1_1.values[:n]),
                                   refit              = False,
                                   n_jobs             = 'auto',
                                   verbose            = True,
                                   show_progress      = True
                               )

print(f"Backtest error (MAE): {metric_baseline}")

In [None]:
forecaster.predict(steps=5)

# MLForecast

In [None]:
!pip install mlforecast
!pip install catboost
#!pip install numba

In [None]:
from mlforecast import MLForecast
from mlforecast.target_transforms import Differences
from mlforecast.utils import PredictionIntervals
from window_ops.expanding import expanding_mean
from mlforecast.lag_transforms import ExpandingMean, RollingMean, ExponentiallyWeightedMean
from mlforecast.target_transforms import LocalStandardScaler

from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
from catboost import CatBoostRegressor
from sklearn.linear_model import LinearRegression

from tqdm import tqdm
from datetime import datetime, timedelta
from sklearn.metrics import mean_absolute_percentage_error

In [None]:
def create_df_wind_speed(n: list[tuple]) -> pd.DataFrame:
    df = pd.DataFrame(data=[['0/0', hour + 1, wind_speed[hour][0][0], wind_speed[hour][n[0][0]][n[0][1]], wind_speed[hour][n[1][0]][n[1][1]], wind_speed[hour][n[2][0]][n[2][1]],
                            wind_speed[hour][n[3][0]][n[3][1]], wind_speed[hour][n[4][0]][n[4][1]], wind_speed[hour][n[5][0]][n[5][1]], wind_speed[hour][n[6][0]][n[6][1]],
                            wind_speed[hour][n[7][0]][n[7][1]]]  for hour in range(43)],
            columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8'])
    for col in df.columns[3:]:
        df[col] = df[col].shift(1)
    return df


def create_df_wind_dir(n: list[tuple]) -> pd.DataFrame:
    df = pd.DataFrame(data=[['0/0', hour + 1, wind_dir[hour][0][0], wind_dir[hour][n[0][0]][n[0][1]], wind_dir[hour][n[1][0]][n[1][1]], wind_dir[hour][n[2][0]][n[2][1]],
                            wind_dir[hour][n[3][0]][n[3][1]], wind_dir[hour][n[4][0]][n[4][1]], wind_dir[hour][n[5][0]][n[5][1]], wind_dir[hour][n[6][0]][n[6][1]],
                            wind_dir[hour][n[7][0]][n[7][1]]]  for hour in range(43)],
            columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8'])
    for col in df.columns[3:]:
        df[col] = df[col].shift(1)
    return df


def create_df_clouds(n: list[tuple], hour: int, coord: tuple) -> tuple:
    global clouds
    df = pd.DataFrame(data=[['0/0', h + 1] + [clouds[h][coord[0]][coord[1]]] + [clouds[h][i[0]][i[1]] for i in n] +
         [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n] for h in range(43 + hour - 1)],
                    columns=['unique_id', 'ds', 'y'] +  [f'neibr{i + 1}_cloud' for i in range(24)] + [f'neibr{i + 1}_cloud_shift2' for i in range(24)] +
                          [f'neibr{i + 1}_cloud_shift3' for i in range(24)] + [f'neibr{i + 1}_cloud_shift4' for i in range(24)] + [f'neibr{i + 1}_cloud_shift5' for i in range(24)])
    df['y'] = double_exponential_smoothing(df.y, 0.3, 0.2)
    future_df = df.iloc[[-1], :].copy()
    future_df['ds'] = 43 + hour
    for i in df.columns[3:3+24]:
        df[i] = df[i].shift(1)
    for i in df.columns[3+24:3+24*2]:
        df[i] = df[i].shift(2)
    for i in df.columns[3+24*2:3+24*3]:
        df[i] = df[i].shift(3)
    for i in df.columns[3+24*3:3+24*4]:
        df[i] = df[i].shift(4)
    for i in df.columns[3+24*4:]:
        df[i] = df[i].shift(5)
    return df, future_df


def create_df_pressure(n: list[tuple], hour: int, coord: tuple) -> tuple:
    df = pd.DataFrame(data=[['0/0', h + 1, pressure[h][coord[0]][coord[1]], pressure[h][n[0][0]][n[0][1]], pressure[h][n[1][0]][n[1][1]], pressure[h][n[2][0]][n[2][1]],
                            pressure[h][n[3][0]][n[3][1]], pressure[h][n[4][0]][n[4][1]], pressure[h][n[5][0]][n[5][1]], pressure[h][n[6][0]][n[6][1]],
                            pressure[h][n[7][0]][n[7][1]]]  for h in range(43 + hour - 1)],
            columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8'])
    future_df = df.iloc[[-1], :].copy()
    future_df['ds'] = 43 + hour
    for col in df.columns[3:]:
        df[col] = df[col].shift(1)
    return df, future_df


#@jit(nopython=True, cache=True)
def create_df_humidity(n: list[tuple], hour: int, coord: tuple) -> tuple:
    df = pd.DataFrame(data=[['0/0', h + 1, humidity[h][coord[0]][coord[1]], humidity[h][n[0][0]][n[0][1]], humidity[h][n[1][0]][n[1][1]], humidity[h][n[2][0]][n[2][1]],
                            humidity[h][n[3][0]][n[3][1]], humidity[h][n[4][0]][n[4][1]], humidity[h][n[5][0]][n[5][1]], humidity[h][n[6][0]][n[6][1]],
                            humidity[h][n[7][0]][n[7][1]]]  for h in range(43 + hour - 1)],
            columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8'])
    future_df = df.iloc[[-1], :].copy()
    future_df['ds'] = 43 + hour
    for col in df.columns[3:]:
        df[col] = df[col].shift(1)
    return df, future_df


#@jit(nopython=True, cache=True)
def create_df_temps(n: list[tuple], hour: int, coord: tuple) -> tuple:
    df = pd.DataFrame(data=[['0/0', h + 1, temps[h][coord[0]][coord[1]]] + [temps[h][i[0]][i[1]] for i in n] + [temps[h][i[0]][i[1]] for i in n]  for h in range(43 + hour - 1)],
        columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8',
         'neibr1_shift2', 'neibr2_shift2', 'neibr3_shift2', 'neibr4_shift2', 'neibr5_shift2', 'neibr6_shift2', 'neibr7_shift2', 'neibr8_shift2'])
    df.y = double_exponential_smoothing(df.y, 0.4, 0.2)
    future_df = df.iloc[[-1], :].copy()
    future_df['ds'] = 43 + hour
    for col in df.columns[3:11]:
        df[col] = df[col].shift(1)
    for col in df.columns[11:19]:
        df[col] = df[col].shift(2)
    return df, future_df

In [None]:
mlf = MLForecast(
    models=[CatBoostRegressor(iterations=800, max_depth=5, verbose=False)],
    freq=1,
    lags=[1, 2, 3, 4],
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean],
    },
)

mlf_forecast_clouds = MLForecast(
    models=[CatBoostRegressor(iterations=800, max_depth=5, verbose=False)],
    freq=1,
    lags=[1, 2, 3, 4, 5],
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean],
        3: [ExponentiallyWeightedMean(alpha=0.1)],
    },
    target_transforms=[LocalStandardScaler()]#, Differences([1])]
)

mlf_forecast_clouds2 = MLForecast(
    models=[CatBoostRegressor(iterations=700, max_depth=5, verbose=False, random_state=3)],
    freq=1,
    lags=list(range(1, 5)),
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean],
        3: [ExponentiallyWeightedMean(alpha=0.2)],
    },
    target_transforms=[LocalStandardScaler()]#, Differences([1])]
)

mlf_temps = MLForecast(
    models=[CatBoostRegressor(iterations=800, max_depth=5, verbose=False, random_state=3)],
    freq=1,
    lags=[1, 2, 3, 4],
    lag_transforms = {
        1:  [expanding_mean, ExponentiallyWeightedMean(0.3)],
        2: [expanding_mean, RollingMean(window_size=5)],
        #3: [RollingMean(window_size=5)]
    },
)

mlf_forecast_clouds3 = MLForecast(
    models=[CatBoostRegressor(iterations=1200, max_depth=4, verbose=False, random_state=3, l2_leaf_reg=1)],
    freq=1,
    lags=list(range(1, 5)),
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean, RollingMean(window_size=5)],
        3: [ExponentiallyWeightedMean(alpha=0.2)],
    },
    target_transforms=[LocalStandardScaler()]#, Differences([1])]
)


def double_exponential_smoothing(series, alpha, beta):
    result = [series[0]]
    for n in range(1, len(series)+1):
        if n == 1:
            level, trend = series[0], series[1] - series[0]
        if n >= len(series):
            value = result[-1]
        else:
            value = series[n]
        last_level, level = level, alpha*value + (1-alpha)*(level+trend)
        trend = beta*(level-last_level) + (1-beta)*trend
        result.append(level+trend)
    return result[:-1]

In [None]:
def make_all_forecast_clouds():
    global clouds
    for hour in tqdm(range(1, 6)):
        preds = make_current_hour_preds(hour, 'clouds')
        clouds = clouds.tolist()
        clouds.append([[preds[30 * row + col] for col in range(30)] for row in range(30)])
        clouds = np.array(clouds)
    return clouds[-5:]


def make_all_forecast_humidity():
    global humidity
    for hour in tqdm(range(1, 6)):
        preds = make_current_hour_preds(hour, 'humidity')
        humidity = humidity.tolist()
        humidity.append([[preds[30 * row + col] for col in range(30)] for row in range(30)])
        humidity = np.array(humidity)
    return humidity[-5:]


#@jit(nopython=True, cache=True)
def make_all_forecast_pressure():
    global pressure
    for hour in tqdm(range(1, 6)):
        preds = make_current_hour_preds(hour, 'press')
        pressure = pressure.tolist()
        pressure.append([[preds[30 * row + col] for col in range(30)] for row in range(30)])
        pressure = np.array(pressure)
    return pressure[-5:]


#@jit(nopython=True, cache=True)
def make_all_forecast_temps():
    global temps
    for hour in tqdm(range(1, 6)):
        preds = make_current_hour_preds(hour, 'temp')
        temps = temps.tolist()
        temps.append([[preds[30 * row + col] for col in range(30)] for row in range(30)])
        temps = np.array(temps)
    return temps[-5:]

In [None]:
import pickle


#@jit(nopython=True)
def make_current_hour_preds(hour: int, pred_type: str) -> list[float]:
    preds = []
    for row in tqdm(range(30)):
        for col in range(30):
            n = find_nine_closest_neighbors(coordinates, (row, col))
            res = 0
            if pred_type == 'temp':
                df, future_df = create_df_temps(n, hour, (row, col))
                res = prediction_temp(df, future_df, hour, (row, col))
            elif pred_type == 'press':
                df, future_df = create_df_pressure(n, hour, (row, col))
                res = prediction(df, future_df, hour, (row, col))
            elif pred_type == 'humidity':
                df, future_df = create_df_humidity(n, hour, (row, col))
                res = prediction(df, future_df, hour, (row, col))
            elif pred_type == 'clouds':
                df, future_df = create_df_clouds(n, hour, (row, col))
                res = prediction_clouds(df, future_df, hour, (row, col))
            preds.append(res)
    return preds


def prediction_temp(df: pd.DataFrame, future_df: pd.DataFrame, hour: int, coord: tuple) -> float:
    data_train = df.iloc[:43 + hour, :]# + hour, :]
    mlf_temps.fit(data_train, dropna=True, static_features=[])
    ans_df = mlf_temps.predict(h=1, X_df=future_df.drop(columns=['y']))
    return ans_df.CatBoostRegressor.values[0]


def prediction_clouds(df: pd.DataFrame, future_df: pd.DataFrame, hour: int, coord: tuple) -> float:
    data_train = df.iloc[:43 + hour, :]
    #if coord[0] == 0 and coord[1] == 0:
    if coord[0] % 10 == 0 and coord[1] % 10 == 0:
        mlf_forecast_clouds4.fit(data_train, dropna=True, static_features=[])
    ans_df = mlf_forecast_clouds4.predict(h=1, X_df=future_df.drop(columns=['y']))
    return ans_df.model1.values[0]


def prediction(df: pd.DataFrame, future_df: pd.DataFrame, hour: int, coord: tuple) -> float:
    #if hour == 1:
    data_train = df.iloc[:43 + hour, :]# + hour, :]
    mlf.fit(data_train, dropna=True, static_features=[])
        #with open(f"/content/models/temps_model_{coord[0]}_{coord[1]}.pkl", "wb") as f:
        #    pickle.dump(mlf, f)
    ans_df = mlf.predict(h=1, X_df=future_df.drop(columns=['y']))
    return ans_df.CatBoostRegressor.values[0]
    #else:
    #    with open(f"/content/models/temps_model_{coord[0]}_{coord[1]}.pkl", "rb") as f:
    #        mlf = pickle.load(f)
    #    ans_df = mlf.predict(h=1, X_df=future_df.drop(columns=['y']))
    #    return ans_df.CatBoostRegressor.values[0]
#mean_absolute_percentage_error(data_val['y'], ans_df.CatBoostRegressor)


def get_neibrs(row: int, col: int) -> list[tuple]:
    if row == 0 and col == 0:
        n = [(0, 1), (1, 0), (1, 1), (2, 1), (2, 2), (1, 2), (2, 0), (0, 2)]
    elif row == 29 and col == 0:
        n = [(29, 1), (28, 0), (28, 1), (27, 0), (27, 1), (27, 2), (28, 2), (29, 2)]
    elif row == 0 and col == 29:
        n = [(0, 28), (1, 28), (1, 29), (0, 27), (1, 27), (2, 27), (2, 28), (2, 29)]
    elif row == 29 and col == 29:
        n = [(29, 28), (28, 28), (28, 29), (29, 27), (28, 27), (27, 27), (27, 28), (27, 29)]
    elif row == 0 and col != 29 and col != 0:
        n = [(row, col - 1), (row, col + 1), (row + 1, col - 1), (row + 1, col), (row + 1, col + 1), (row + 2, col - 1), (row + 2, col), (row + 2, col + 1)]
    elif row == 29 and col != 29 and col != 0:
        n = [(row, col - 1), (row, col + 1), (row - 1, col - 1), (row - 1, col), (row - 1, col + 1), (row - 2, col - 1), (row - 2, col), (row - 2, col + 1)]
    elif col == 0 and row != 29 and row != 0:
        n = [(row + 1, col), (row - 1, col), (row + 1, col + 1), (row, col + 1), (row - 1, col + 1), (row + 1, col + 2), (row, col + 2), (row - 1, col + 2)]
    elif col == 29 and row != 29 and row != 0:
        n = [(row + 1, col), (row - 1, col), (row + 1, col - 1), (row, col - 1), (row - 1, col - 1), (row + 1, col - 2), (row, col - 2), (row - 1, col - 2)]
    else:
        n = [(row - 1, col - 1), (row - 1, col), (row - 1, col + 1), (row, col - 1), (row, col + 1), (row + 1, col - 1), (row + 1, col), (row + 1, col + 1)]
    return n


In [None]:
clouds.shape

In [None]:
make_all_forecast_clouds()

# Submit

In [None]:
clouds.shape

In [None]:
np.median(t[43].reshape(-1)), np.median(t[44].reshape(-1)), np.median(t[45].reshape(-1)), np.median(t[46].reshape(-1)), np.median(t[47].reshape(-1))

In [None]:
np.median(clouds[43].reshape(-1)), np.median(clouds[44].reshape(-1)), np.median(clouds[45].reshape(-1)), np.median(clouds[46].reshape(-1)), np.median(clouds[47].reshape(-1))

In [None]:
np.save('/content/clouds_forecast5_12.npy', clouds)

In [None]:
t = np.load('/content/clouds_forecast5_12.npy')
t.shape

In [None]:
solution = pd.read_csv('/content/drive/MyDrive/YADRO_Hack/solution_best2.csv')
solution.head()

In [None]:
clouds_forecast5 = [clouds[43], clouds[44], clouds[45], clouds[46], clouds[47]]
clouds_forecast5 = np.array(clouds_forecast5)
clouds_forecast5.shape

In [None]:
clouds_forecast5 = clouds_forecast5.reshape(-1)
clouds_forecast5.shape, clouds_forecast5[:-5]

In [None]:
solution['cloud_cover'] = clouds_forecast5
solution.head()

In [None]:
solution.head()

In [None]:
solution.to_csv('/content/solution_best2_update_clouds9.csv', index=False)

# Experiments clouds

In [None]:
#169 стационарных рядов

In [None]:
import math


coordinates = [(i, j) for i in range(30) for j in range(30)]


def distance(point1, point2):
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2) * 5

# Функция для поиска 24 ближайших соседей заданной точки
def find_nine_closest_neighbors(coordinates, target):
    neighbors = []
    min_neighbors = [(0, 0)] + [(0, 0)] * 24
    min_distances = [float('inf')] + [float('inf')] * 24

    for coord in coordinates:
        dist = distance(coord, target)
        if dist < max(min_distances):
            idx = min_distances.index(max(min_distances))
            min_distances[idx] = dist
            min_neighbors[idx] = coord

    for i in range(24):
        neighbors.append(min_neighbors[i])
    return sorted(neighbors, key=lambda x: (x[0] - target[0]) ** 2 + (x[1] - target[1]) ** 2)


In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
#добавим соседей и будем смотреть только tempreture
from tqdm import tqdm
hour = 43
arr3 = []
mean_mape = 0
for row in tqdm(range(30)):
    for col in range(30):
        n = find_nine_closest_neighbors(coordinates, (row, col))
        df = pd.DataFrame(data=[['0/0', h + 1] + [clouds[h][row][col]] + [clouds[h][i[0]][i[1]] for i in n] +
         [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n] for h in range(43)],
                    columns=['unique_id', 'ds', 'y'] +  [f'neibr{i + 1}_cloud' for i in range(24)] + [f'neibr{i + 1}_cloud_shift2' for i in range(24)] +
                          [f'neibr{i + 1}_cloud_shift3' for i in range(24)] + [f'neibr{i + 1}_cloud_shift4' for i in range(24)] + [f'neibr{i + 1}_cloud_shift5' for i in range(24)])
        for i in df.columns[3:3+24]:
            df[i] = df[i].shift(1)
        for i in df.columns[3+24:3+24*2]:
            df[i] = df[i].shift(2)
        for i in df.columns[3+24*2:3+24*3]:
            df[i] = df[i].shift(3)
        for i in df.columns[3+24*3:3+24*4]:
            df[i] = df[i].shift(4)
        for i in df.columns[3+24*4:]:
            df[i] = df[i].shift(5)
        df['y'] = double_exponential_smoothing(df.y, 0.3, 0.2)
        data_train = df.iloc[:41, :]
        data_val = df.iloc[41:, :]
        if row % 10 == 0 and col % 10 == 0:
            mlf_forecast_clouds4.fit(data_train, dropna=True, static_features=[])
        #if row == 0 and col == 0:
        #    mlf_forecast_clouds2.fit(data_train, dropna=True, static_features=[])
        ans_df = mlf_forecast_clouds4.predict(h=2, X_df=data_val.drop(columns=['y']))
        arr3.append(ans_df.model1.values.reshape(-1))#, ans_df.model2.values.reshape(-1)])
        mean_mape += mean_absolute_percentage_error(data_val['y'], ans_df.model1)#(ans_df.model2 + ans_df.model1) / 2)
mean_mape / 900

In [None]:
from scipy import stats
np.array(arr3).mean(), np.median(np.array(arr3))

In [None]:
#0.1914269615058467 (31.401661870978295, 31.062088936896615

In [None]:
mlf_forecast_clouds3 = MLForecast(
    models={'model1': CatBoostRegressor(iterations=300, max_depth=3, verbose=False, random_state=3, loss_function='Quantile:alpha=0.2')},
            #'model2': XGBRegressor(max_depth=5, random_state=3, objective='reg:quantileerror', quantile_alpha=0.85)},
    freq=1,
    lags=list(range(1, 5)),
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean, RollingMean(window_size=5)],
        3: [RollingMean(window_size=5)],
    },
    target_transforms=[LocalStandardScaler()]#, Differences([1])]
)

In [None]:
mlf_forecast_clouds4 = MLForecast(
    models={'model1': CatBoostRegressor(iterations=400, max_depth=4, verbose=False, random_state=3, l2_leaf_reg=0.5, loss_function='Quantile:alpha=0.1')},
            #'model2': XGBRegressor(max_depth=5, random_state=3, objective='reg:quantileerror', quantile_alpha=0.85)},
    freq=1,
    lags=list(range(1, 5)),
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean, RollingMean(window_size=3)],
        3: [expanding_mean, RollingMean(window_size=3)],
    },
    target_transforms=[LocalStandardScaler()]#, Differences([1])]
)

In [None]:
a, b = 0, 0
n = find_nine_closest_neighbors(coordinates, (a, b))
df = pd.DataFrame(data=[['0/0', h + 1] + [clouds[h][a][b]] + [clouds[h][i[0]][i[1]] for i in n] +
         [clouds[h][i[0]][i[1]] for i in n] for h in range(43)],
                    columns=['unique_id', 'ds', 'y'] +  [f'neibr{i + 1}_cloud' for i in range(35)] + [f'neibr{i + 1}_cloud_shift2' for i in range(35)])
for col in df.columns[3:3+35]:
    df[col] = df[col].shift(1)
for col in df.columns[3+35:]:
    df[col] = df[col].shift(2)
df['y'] = double_exponential_smoothing(df.y, 0.3, 0.2)

In [None]:
df.head()

In [None]:
data_train = df.iloc[:41, :]
data_val = df.iloc[41:, :]

In [None]:
mlf_forecast_clouds4.fit(data_train, dropna=True, static_features=[])

In [None]:
ans_df = mlf_forecast_clouds2.predict(h=5, X_df=data_val.drop(columns=['y']))
ans_df.CatBoostRegressor

In [None]:
mean_absolute_percentage_error(data_val['y'], ans_df.CatBoostRegressor)

In [None]:
df.y.plot()

In [None]:
df['y_smooth'] = double_exponential_smoothing(df.y, 0.4, 0.2)
df.y_smooth.plot()

# Experiments temps

In [None]:
#добавим соседей и будем смотреть только tempreture
hour = 43
mean_mape = 0
for row in tqdm(range(0, 30, 6)):
    for col in range(30):
        n = get_neibrs(row, col)
        df = pd.DataFrame(data=[['0/0', h + 1, temps[h][row][col]] + [temps[h][i[0]][i[1]] for i in n] + [temps[h][i[0]][i[1]] for i in n]  for h in range(43)],
        columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8',
         'neibr1_shift2', 'neibr2_shift2', 'neibr3_shift2', 'neibr4_shift2', 'neibr5_shift2', 'neibr6_shift2', 'neibr7_shift2', 'neibr8_shift2'])
        for col in df.columns[3:11]:
            df[col] = df[col].shift(1)
        for col in df.columns[11:19]:
            df[col] = df[col].shift(2)
        df.y = double_exponential_smoothing(df.y, 0.4, 0.2)
        data_train = df.iloc[:38, :]
        data_val = df.iloc[38:, :]
        mlf_temps.fit(data_train, dropna=True, static_features=[])
        ans_df = mlf_temps.predict(h=5, X_df=data_val.drop(columns=['y']))
        mean_mape += mean_absolute_percentage_error(data_val['y'], ans_df.CatBoostRegressor)
mean_mape / 150

In [None]:
#0.04695709071939297

In [None]:
mlf_temps = MLForecast(
    models=[CatBoostRegressor(iterations=800, max_depth=5, verbose=False, random_state=3)],
    freq=1,
    lags=[1, 2, 3, 4],
    lag_transforms = {
        1:  [expanding_mean, ExponentiallyWeightedMean(0.3)],
        2: [expanding_mean, RollingMean(window_size=5)],
        #3: [RollingMean(window_size=5)]
    },
)

In [None]:
n = get_neibrs(0, 0)
df = pd.DataFrame(data=[['0/0', h + 1, temps[h][0][0]] + [temps[h][i[0]][i[1]] for i in n] +
    [temps[h][i[0]][i[1]] for i in n] + [temps[h][i[0]][i[1]] for i in n] for h in range(43)],
columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8',
         'neibr1_shift2', 'neibr2_shift2', 'neibr3_shift2', 'neibr4_shift2', 'neibr5_shift2', 'neibr6_shift2', 'neibr7_shift2', 'neibr8_shift2',
         'neibr1_shift3', 'neibr2_shift3', 'neibr3_shift3', 'neibr4_shift3', 'neibr5_shift3', 'neibr6_shift3', 'neibr7_shift3', 'neibr8_shift3'])
#df.y = double_exponential_smoothing(df.y, 0.3, 0.2)
for col in df.columns[3:11]:
    df[col] = df[col].shift(1)
for col in df.columns[11:19]:
    df[col] = df[col].shift(2)
for col in df.columns[19:27]:
    df[col] = df[col].shift(3)

In [None]:
df.y.plot()

In [None]:
df['y2'] = double_exponential_smoothing(df.y, 0.5, 0.5)
df.y2.plot()

In [None]:
data_train = df.iloc[:38, :]
data_val = df.iloc[38:, :]

In [None]:
mlf_temps.fit(data_train, dropna=True, static_features=[])

In [None]:
ans_df = mlf_temps.predict(h=5, X_df=data_val.drop(columns=['y']))
ans_df.CatBoostRegressor, mean_absolute_percentage_error(data_val['y'], ans_df.CatBoostRegressor)

# SARIMAX

In [None]:
!pip install pmdarima -q

In [None]:
import pmdarima as pm

In [None]:
temps1['neibr1'].shift(1)

In [None]:
n = 38
data_train = temps1.iloc[:n, :]
data_val = temps1.iloc[n:, :]

In [None]:
exog = ['neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8']

In [None]:
SARIMAX_model = pm.auto_arima(data_train[['y']], exogenous=data_train[exog],
                           start_p=1, start_q=1,
                           test='adf',
                           max_p=3, max_q=3, m=12,
                           start_P=0, seasonal=True,
                           d=None, D=1,
                           trace=False,
                           error_action='ignore',
                           suppress_warnings=True,
                           stepwise=True)

In [None]:
fitted, confint = SARIMAX_model.predict(n_periods=5,
                                            return_conf_int=True,
                                            exogenous=data_val[exog])


In [None]:
from sklearn.metrics import mean_absolute_percentage_error


mean_absolute_percentage_error(data_val['y'], fitted)

# Best cloud 0.66


```mlf_forecast_clouds3 = MLForecast(
    models={'model1': CatBoostRegressor(iterations=1000, max_depth=5, verbose=False, random_state=3, l2_leaf_reg=2, loss_function='Quantile:alpha=0.1')},
            #'model2': XGBRegressor(max_depth=5, random_state=3, objective='reg:quantileerror', quantile_alpha=0.85)},
    freq=1,
    lags=list(range(1, 5)),
    lag_transforms = {
        1:  [expanding_mean],
        2: [expanding_mean, RollingMean(window_size=3)],
        3: [expanding_mean, RollingMean(window_size=3)],
    },
    target_transforms=[LocalStandardScaler()]#, Differences([1])]
)
```



```n = find_nine_closest_neighbors(coordinates, (a, b))
df = pd.DataFrame(data=[['0/0', h + 1] + [clouds[h][a][b]] #+ [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n]
        + [clouds[h][i[0]][i[1]] for i in n] + [clouds[h][i[0]][i[1]] for i in n]
                        for h in range(43)],
                    columns=['unique_id', 'ds', 'y', 'neibr1_cloud', 'neibr2_cloud', 'neibr3_cloud', 'neibr4_cloud', 'neibr5_cloud', 'neibr6_cloud',
                            'neibr7_cloud', 'neibr8_cloud', 'neibr9_cloud', 'neibr10_cloud', 'neibr11_cloud', 'neibr12_cloud', 'neibr13_cloud', 'neibr14_cloud',
                            'neibr15_cloud', 'neibr16_cloud', 'neibr17_cloud', 'neibr18_cloud', 'neibr19_cloud', 'neibr20_cloud', 'neibr21_cloud', 'neibr22_cloud',
                            'neibr23_cloud', 'neibr24_cloud',
                             'neibr1_cloud_shift2', 'neibr2_cloud_shift2', 'neibr3_cloud_shift2', 'neibr4_cloud_shift2',
                            'neibr5_cloud_shift2', 'neibr6_cloud_shift2', 'neibr7_cloud_shift2', 'neibr8_cloud_shift2',
                             'neibr9_cloud_shift2', 'neibr10_cloud_shift2', 'neibr11_cloud_shift2', 'neibr12_cloud_shift2',
                            'neibr13_cloud_shift2', 'neibr14_cloud_shift2', 'neibr15_cloud_shift2', 'neibr16_cloud_shift2',
                             'neibr17_cloud_shift2', 'neibr18_cloud_shift2', 'neibr19_cloud_shift2', 'neibr20_cloud_shift2',
                            'neibr21_cloud_shift2', 'neibr22_cloud_shift2', 'neibr23_cloud_shift2', 'neibr24_cloud_shift2',
for col in df.columns[3:27]:
    df[col] = df[col].shift(1)
for col in df.columns[27:27+24]:
    df[col] = df[col].shift(2)
```

# Best Temp 0.054


```mlf_temps = MLForecast(
    models=[CatBoostRegressor(iterations=800, max_depth=5, verbose=False, random_state=3)],
    freq=1,
    lags=[1, 2, 3, 4],
    lag_transforms = {
        1:  [expanding_mean, ExponentiallyWeightedMean(0.3)],
        2: [expanding_mean, RollingMean(window_size=5)],
        #3: [RollingMean(window_size=5)]
    },
)```


```def create_df_temps(n: list[tuple], hour: int, coord: tuple) -> tuple:
    df = pd.DataFrame(data=[['0/0', h + 1, temps[h][coord[0]][coord[1]]] + [temps[h][i[0]][i[1]] for i in n] + [temps[h][i[0]][i[1]] for i in n]  for h in range(43 + hour - 1)],
        columns=['unique_id', 'ds', 'y', 'neibr1', 'neibr2', 'neibr3', 'neibr4', 'neibr5', 'neibr6', 'neibr7', 'neibr8',
         'neibr1_shift2', 'neibr2_shift2', 'neibr3_shift2', 'neibr4_shift2', 'neibr5_shift2', 'neibr6_shift2', 'neibr7_shift2', 'neibr8_shift2'])
    df.y = double_exponential_smoothing(df.y, 0.4, 0.2)
    future_df = df.iloc[[-1], :].copy()
    future_df['ds'] = 43 + hour
    for col in df.columns[3:11]:
        df[col] = df[col].shift(1)
    for col in df.columns[11:19]:
        df[col] = df[col].shift(2)
    return df, future_df
```