In [None]:
from collections import Counter
import numpy as np
import pandas as pd 
import random

In [None]:
# read specific columns of csv file
data = pd.read_csv('/Users/nezelek/PDS/goes16.csv', usecols=['xrsb_flux','status'], na_filter=False)

# gather indices of flares and non-flares
flare_indices = data.index[data['status'] == 'EVENT_PEAK'].tolist()
nonflare_indices = data.index[data['status'] != 'EVENT_PEAK'].tolist()

# create index for undersampling nonflares
n_flares = len(flare_indices)
print(n_flares)
nonflare_sample_indices = random.sample(nonflare_indices, 3*n_flares)
print(len(nonflare_sample_indices))
sample_indices = sorted(flare_indices + nonflare_sample_indices)

In [None]:
# standardize flux values
data['xrsb_flux'] = (data['xrsb_flux'] - data['xrsb_flux'].mean()) / data['xrsb_flux'].std()

# factorize status values
data['status'] = 1 + pd.factorize(data['status'])[0]
df = data[['xrsb_flux','status']]

In [None]:
# train-test split
SPLIT_VALUE = int(0.5*len(df))
train = df[:SPLIT_VALUE].to_numpy()
test = df[SPLIT_VALUE:].to_numpy()

In [67]:
# create sequence arrays
X_train = []
y_train = []
X_test = []
y_test = []
timesteps = 60
halfpoint = int(timesteps/2)

for i in range(halfpoint+1, len(train)-halfpoint-1):
    if i in sample_indices:
        X_train.append(train[i-halfpoint:i+halfpoint, 0])
        y_train.append(int(train[i,1]))

for i in range(halfpoint+1, len(test)-halfpoint-1):
    if i+len(train) in sample_indices:
        X_test.append(test[i-halfpoint:i+halfpoint, 0])
        y_test.append(int(test[i,1]))

print(Counter(y_train))
print(Counter(y_test))

Counter({1: 13150, 2: 2162})
Counter({1: 13155, 2: 6607})


In [68]:
# save for use in extractor
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/SolarFlares/SolarFlares_X_TRAIN.npy', X_train)
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/SolarFlares/SolarFlares_X_TEST.npy', X_test)
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/SolarFlares/SolarFlares_Y_TRAIN.npy', y_train)
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/SolarFlares/SolarFlares_Y_TEST.npy', y_test)

In [69]:
# mini sets to see if extractor works
mini_train_sample_indices = random.sample(range(len(y_train)),100)
mini_test_sample_indices = random.sample(range(len(y_test)),100)
mini_X_train = [X_train[i] for i in mini_train_sample_indices]
mini_X_test = [X_test[i] for i in mini_test_sample_indices]
mini_y_train = [y_train[i] for i in mini_train_sample_indices]
mini_y_test = [y_test[i] for i in mini_test_sample_indices]

print(Counter(mini_y_train))
print(Counter(mini_y_test))

np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/MiniSolarFlares/MiniSolarFlares_X_TRAIN.npy', mini_X_train)
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/MiniSolarFlares/MiniSolarFlares_X_TEST.npy', mini_X_test)
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/MiniSolarFlares/MiniSolarFlares_Y_TRAIN.npy', mini_y_train)
np.save('/Users/nezelek/PDS/FE_GE_TSC-master/data/MiniSolarFlares/MiniSolarFlares_Y_TEST.npy', mini_y_test)

Counter({1: 82, 2: 18})
Counter({1: 67, 2: 33})
