In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler

In [None]:
temps = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/temperature.npy')
clouds = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/cloud_cover.npy')
humidity = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/humidity.npy')
elevation = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/elevation.npy')
pressure = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/pressure.npy')
wind_dir = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/wind_dir.npy')
wind_speed = np.load('/content/drive/MyDrive/Colab Notebooks/хак yadro/wind_speed.npy')

In [None]:
#датасет разбит на диапазоны по часу измерения, каждый час имеет 900 строк для каждой из координат с соттветсвующими признаками
df = pd.DataFrame(data=[[hour, row, col, elevation[row][col], clouds[hour][row][col], humidity[hour][row][col],
                         pressure[hour][row][col], wind_dir[hour][row][col],
                         wind_speed[hour][row][col], temps[hour][row][col]]
                        for hour in range(43) for row in range(30) for col in range(30)],
                  columns=['hour', 'row', 'col', 'elevation', 'cloud', 'humidity', 'pressure', 'wind_dir', 'wind_speed', 'temperature'])

In [None]:
coordinates = [(i, j) for i in range(30) for j in range(30)]
x_values = [coord[0] for coord in coordinates[:900]]
y_values = [coord[1] for coord in coordinates[:900]]

In [None]:
import math

# Функция для вычисления расстояния между двумя точками на карте
def distance(point1, point2):
    return math.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2) * 5

# Функция для поиска 9 ближайших соседей заданной точки
def find_n_closest_neighbors(coordinates, target, n_neighbours):
    neighbors = []
    min_neighbors = [(0, 0)] + [(0, 0)] * n_neighbours
    min_distances = [float('inf')] + [float('inf')] * n_neighbours

    for coord in coordinates:
        dist = distance(coord, target)
        if dist < max(min_distances):
            idx = min_distances.index(max(min_distances))
            min_distances[idx] = dist
            min_neighbors[idx] = coord

    for i in range(n_neighbours + 1):
        neighbors.append(min_neighbors[i])

    return neighbors

# Пример создания координат карты и заданных координат точки

target = (0, 0)

# Поиск координат девяти ближайших соседей для заданной точки
result = find_n_closest_neighbors(coordinates, target, 15)
print(result)


In [None]:
def get_target_point_df(target_point, target_name, predictors, n_neighbours, df, shift = False, normalize = False):
  neighbors = find_n_closest_neighbors(coordinates, target_point, n_neighbours=n_neighbours)
  df_neighbors = pd.DataFrame()

  df = df

  for i, neig in enumerate(neighbors):
    names =[f'{i}_{predictor}' for predictor in predictors]

    if len(df_neighbors) == 0:
      df_neighbors = pd.DataFrame(data = df[predictors][(df['col'] == neig[0]) & (df['row'] == neig[1])].to_numpy(), columns = names)
    else:
      df_neighbors = pd.concat([df_neighbors, pd.DataFrame(data = df[predictors][(df['col'] == neig[0]) & (df['row'] == neig[1])].to_numpy(), columns = names)], axis = 1)

  df_neighbors = pd.concat([df_neighbors, pd.DataFrame(df[target_name][(df['col'] == target_point[0]) & (df['row'] == target_point[1])].to_numpy(), columns=['target'])], axis = 1)

  if shift:
    for col in df_neighbors.columns:
      if target_name in col:
        df_neighbors[col] = df_neighbors[col].shift(1)
    df_neighbors = df_neighbors[1:]
    df_neighbors = df_neighbors.reset_index()

  if normalize:
    df_neighbors = pd.concat([pd.DataFrame(MinMaxScaler().fit_transform(df_neighbors.drop(['target'], axis = 1)), columns = list(df_neighbors.columns)[:-1]), df_neighbors['target']], axis = 1)

  return df_neighbors

In [None]:
get_target_point_df((11, 11), 'wind_speed', predictors, 3, df, shift = True, normilize = True).head()

In [None]:
pip install catboost

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
from catboost import CatBoostRegressor
from xgboost import XGBRegressor

In [None]:
def double_exponential_smoothing(series, alpha, beta):
    result = [series[0]]
    for n in range(1, len(series)+1):
        if n == 1:
            level, trend = series[0], series[1] - series[0]
        if n >= len(series): # прогнозируем
            value = result[-1]
        else:
            value = series[n]
        last_level, level = level, alpha*value + (1-alpha)*(level+trend)
        trend = beta*(level-last_level) + (1-beta)*trend
        result.append(level+trend)
    return result

def exponential_smoothing(series, alpha):
    result = [series[0]] # first value is same as series
    for n in range(1, len(series)):
        result.append(alpha * series[n] + (1 - alpha) * result[n-1])
    return np.array(result)

In [None]:
def predict_target(points, alpha, model, beta, target_name, predictors, hour_split, n_neighbours, normalize = False):
  prediction_lst = []
  y_test_lst = []

  for point in points:
    train_df = get_target_point_df(target_name = target_name, df = df, predictors = predictors, target_point = point, shift = True, n_neighbours=n_neighbours, normalize=normalize)
    X = train_df.drop(['target'], axis = 1).to_numpy()
    y = double_exponential_smoothing(series = train_df['target'].values, alpha = alpha, beta = beta)
    X_train, X_test, y_train, y_test = X[:hour_split], [X[hour_split]], y[:hour_split], y[hour_split]

    model = model
    model.fit(X_train, y_train)
    prediction = model.predict(
        X_test
    )
    prediction_lst.append(prediction)
    y_test_lst.append(train_df['target'].iloc[41])

  return prediction_lst, y_test_lst

In [None]:
# get best model
for model in [XGBRegressor(), CatBoostRegressor(silent = True)][::-1]:
  predictions, y_true = predict_target(points = coordinates[:150], alpha = 0.32, beta = 0.15, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= 15, model = model)
  print(mean_absolute_percentage_error(y_true, predictions))

In [None]:
# get best beta
for beta in [0.1, 0.15, 0.2]:
  predictions, y_true = predict_target(points = coordinates[:150], alpha = 0.32, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= 15, model = XGBRegressor())
  print(mean_absolute_percentage_error(y_true, predictions))

In [None]:
# get best beta
for beta in [0.17, 0.19, 0.21, 0.23]:
  predictions, y_true = predict_target(points = coordinates[:150], alpha = 0.32, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= 15, model = XGBRegressor())
  print(mean_absolute_percentage_error(y_true, predictions))

In [None]:
# get best beta
for beta in [0.15, 0.2, 0.25]:
  for alpha in [0.25, 0.3, 0.35]:
    predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= 15, model = XGBRegressor())
    print(mean_absolute_percentage_error(y_true, predictions), alpha, beta)

In [None]:
# get best beta and alpha
for beta in [0.22, 0.23, 0.24, 0.26]:
  for alpha in [0.23, 0.24, 0.26, 0.27]:
    predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= 15, model = XGBRegressor())
    print(mean_absolute_percentage_error(y_true, predictions), alpha, beta)

In [None]:
# get best number of neighbours

alpha = 0.24
beta = 0.26

for n_neighbours in [9, 15, 24]:
  predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= n_neighbours, model = XGBRegressor())
  print(mean_absolute_percentage_error(y_true, predictions), n_neighbours)


In [None]:
# get best number of neighbours
# conclusion: the more neighbours the better result

alpha = 0.24
beta = 0.26

for n_neighbours in [25, 30, 35]:
  predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours= n_neighbours, model = XGBRegressor())
  print(mean_absolute_percentage_error(y_true, predictions), n_neighbours)

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
for max_depth in [4, 5, 10, 15]:
  predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours=3, model = XGBRegressor(max_depth=max_depth))
  print(mean_absolute_percentage_error(y_true, predictions), max_depth)

In [None]:
for max_depth in [1, 5, 10]:
  for learning_rate in [0.1, 0.5, 1]:
    for n_estimators in [50, 500, 1000]:
      for random_state in [3, 42, 1000]:
        predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours=3, model = XGBRegressor(max_depth=max_depth, learning_rate=learning_rate, n_estimators=n_estimators, random_state=random_state))
        print(mean_absolute_percentage_error(y_true, predictions), max_depth, learning_rate, n_estimators, random_state)

In [None]:
for max_depth in [1, 3, 5, 7]:
    for n_estimators in [500, 600, 700, 1000]:
      predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = ['wind_speed', 'temperature', 'humidity'], hour_split = 41, n_neighbours=3, model = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators))
      print(mean_absolute_percentage_error(y_true, predictions), max_depth, n_estimators)

In [None]:
max_depth = 1
n_esimators = 500
random_state = 42

for predictors in [['wind_speed', 'temperature', 'humidity'], ['wind_speed'], ['wind_speed', 'humidity', 'temperature', 'pressure'], ['wind_speed', 'pressure']]:
  predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = predictors, hour_split = 41, n_neighbours=3, model = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state = 42))
  print(mean_absolute_percentage_error(y_true, predictions), predictors)

In [None]:
predictors = ['wind_speed', 'humidity', 'temperature', 'pressure']
predictions, y_true = predict_target(points = coordinates, alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = predictors, hour_split = 41, n_neighbours=35, model = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state = 42))

In [None]:
mean_absolute_percentage_error(y_true, predictions)

In [None]:
for norm in [True, False]:
  predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = predictors, hour_split = 41, n_neighbours=35, model = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state = 42), normalize = norm)
  print(mean_absolute_percentage_error(y_true, predictions))

In [None]:
predictions, y_true = predict_target(points = coordinates[:100], alpha = alpha, beta = beta, target_name = 'wind_speed', predictors = predictors, hour_split = 41, n_neighbours=35, model = XGBRegressor(max_depth=max_depth, n_estimators=n_estimators, random_state = 42))
print(mean_absolute_percentage_error(y_true, predictions))