## 1. Read raw data

In [None]:
import pandas as pd
import numpy as np
import torch
from xgboost import XGBClassifier

# Import class for dataset creating
from xgboost_data_creater.create_dataset_for_one_model_with_neighbours import AllPointsDatasetCreater

In [None]:
torch_data = torch.load("data/pdsi_CentralKZ.csv")

In [None]:
numpy_data = torch_data.numpy()

In [None]:
numpy_data.shape

## 2. Preparing data

In [None]:
hist_len = 2
num_of_future_indexes = 12
x_min = 1
x_max = numpy_data.shape[1]
y_min = 1
y_max = numpy_data.shape[2]
pdsi_threshold = -2
filter_size = (1,1)
data_creater = AllPointsDatasetCreater(numpy_data, history_len = hist_len, 
                                       num_of_future_indexes = num_of_future_indexes, 
                                       time_border = int(0.7 * numpy_data.shape[0]),
                                       x_min = x_min, x_max = x_max, y_min = y_min, y_max = y_max,
                                       filter_size = filter_size,
                                       pdsi_threshold = pdsi_threshold)

In [None]:
%%time
data_creater.create_train_and_test_datasets()

In [None]:
train = data_creater.get_train_array()

In [None]:
test_by_point = data_creater.get_test_array_by_points()

In [None]:
# Separate data and target from each other.
trainX, trainy = train[:, :-num_of_future_indexes], train[:, -num_of_future_indexes:]

In [None]:
trainX.shape

In [None]:
trainy.shape

## 3. Fitting XGBoost

In [None]:
%%time
### Train XGBoost
model = XGBClassifier()
model.fit(trainX, trainy)

## 4. Testing model

In [None]:
from sklearn.metrics import roc_auc_score, accuracy_score

In [None]:
roc_auc_list = []

In [None]:
forecast_hors = list(range(num_of_future_indexes))

In [None]:
for horizon in forecast_hors:
    tmp_roc_auc_list = []
    
    for ind in range((x_max - x_min)*(y_max - y_min)):
        testX, testy = test_by_point[ind][:, :-num_of_future_indexes], test_by_point[ind][:, -num_of_future_indexes:]
        pred = model.predict(testX)    
        roc_auc = roc_auc_score(testy[:, horizon], pred[:, horizon])
        tmp_roc_auc_list.append(roc_auc)
    
    median_roc_auc = np.median(tmp_roc_auc_list)
    roc_auc_list.append(median_roc_auc)

In [None]:
np.round(roc_auc_list, 2)