In [198]:
import numpy as np
from scipy.spatial import distance
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.utils import resample
import os

In [190]:
def preprocessing(station):
    df = pd.read_csv('../Artificial_data/DOGEx_v1/csv/' + station + '.csv', header=None)

    df.columns = ['date', 'un', 'ue', 'uz', 'sn', 'se', 'sz']
    df.drop(['sn', 'se', 'sz'], axis=1, inplace=True)
    df = df[['date','uz']]

    meta = pd.read_csv('synthetic-meta.txt', sep=',', header=None)
    meta.columns = ['station', 'direction', 'date']
    z_meta = meta[meta.direction == 'Z']

    offset_dates = z_meta[z_meta.station==station]['date']

    offset = 10
    interval_size = 40
    ranges = np.empty((1 * (df.shape[0]-interval_size)//offset, 3), dtype=np.ndarray)

    ranges[0] = (np.array([1, 2, 3]), np.array([1, 2, 3]), np.array([1]))

    j=0
    for col in ['uz']:
        i = 0
        while i < ranges.shape[0] / 1:

            arr = np.array(df.iloc[i * offset:i * offset + interval_size].T)

            for date in arr[0]:
                if date in offset_dates.values:
                    ranges[j] = (arr[0], arr[1], 1)
                    break
                else:
                    ranges[j] = (arr[0], arr[1], 0)

            i = i + 1   
            j = j + 1    


    data = pd.DataFrame(ranges)
    data.columns = ['dates', 'uz', 'label']
    regular = data[data.label == 0]
    offsets  = data[data.label == 1]

    offsets_upsampled = resample(offsets, replace=True, n_samples=len(regular), random_state=42)
    data_upsampled = pd.concat([regular, offsets_upsampled])

    data_upsampled.label.value_counts()

    X = X = np.stack(np.array(data_upsampled['uz']))
    y = np.stack(np.array(data_upsampled['label']))
    return X, y

In [191]:
X, y = preprocessing('awmi')

In [192]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [193]:
import sktime.classification.kernel_based
clf = sktime.classification.kernel_based.RocketClassifier()

In [194]:
clf.fit(X_train, y_train)

In [195]:
y_pred = clf.predict(X_test)

In [196]:
from sklearn.metrics import accuracy_score

accuracy_score(y_test, y_pred)

1.0

In [197]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       169
           1       1.00      1.00      1.00       170

    accuracy                           1.00       339
   macro avg       1.00      1.00      1.00       339
weighted avg       1.00      1.00      1.00       339



In [None]:
#### from sklearn.metrics import plot_confusion_matrix, confusion_matrix
confusion_matrix(y_test, y_pred)

plot_confusion_matrix(clf, X_test, y_test) 