In [1]:
import numpy as np
import pandas as pd
import torch #as th
import os

import pickle
from scipy.io import arff

import utils

In [54]:
path = os.getcwd() #'/Users/fuad'
path

'/Users/fuad/Downloads/NMSU/Experiments/solar_flare'

In [55]:
#folder_location = path + '/Downloads/NMSU/Experiments/great_mvts/Multivariate_arff/'
data_path = path + '/big_data_cleaned/'

In [56]:
train_data = data_path + 'partition3'
test_data = data_path + 'partition4'

In [57]:
def load(file_name):
    with open(file_name, 'rb') as fp:
        obj = pickle.load(fp)
    return obj


def load_data(partition):   
    mvts = load(partition + "_data.pkl")
    labels = load(partition + "_labels.pkl")   
    return mvts, labels

In [58]:
#Binary classification --> label conversion to BINARY class
def get_binary_labels_from(labels_str):
    tdf = pd.DataFrame(labels_str, columns = ['labels'])
    #data_classes= [0, 1, 2, 3]
    #d = dict(zip(data_classes, [0, 0, 1, 1])) 
    d = {'B': 0, 'C': 0, 'F': 0, 'M': 1, 'X': 1}

    arr = tdf['labels'].map(d, na_action='ignore')
    return arr.to_numpy()

In [59]:
X, Y = load_data(train_data)    #utils.get_XY_np_array(tarin_data[0])
#Y = np.array(utils.get_int_labels_from_str(Y))
Y = get_binary_labels_from(Y)
print("type(X): ", type(X), " X.shape: ",X.shape)
print("type(Y): ", type(Y), " Y.shape: ",Y.shape)


type(X):  <class 'numpy.ndarray'>  X.shape:  (37812, 24, 60)
type(Y):  <class 'numpy.ndarray'>  Y.shape:  (37812,)


In [60]:
TORCH_SEED = 0
#building standard scaler on train data X

#---------------Node Label Data Scaling-----------
trans = utils.GetTransposed2D(X)
data2d = utils.Make2D(trans)
scaler = utils.GetStandardScaler(data2d)

In [61]:
X_train = utils.transform_scale_data(X, scaler)
y_train = Y
unique_y_train, counts_y_train = np.unique(y_train, return_counts=True)
num_y_class = unique_y_train.shape[0]

original data shape: (37812, 24, 60)
transposed data shape: (37812, 60, 24)
2d data shape: (2268720, 24)
mvts data shape: (37812, 60, 24)
transBack data shape: (37812, 24, 60)


In [62]:
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)
#y_train_stats = dict(zip(unique_y_train, counts_y_train))
print("unique_y_train: ", unique_y_train)
print("y_train_counts: ", counts_y_train)
print("num_y_class: ", num_y_class)

X_train shape:  (37812, 24, 60)
y_train shape:  (37812,)
unique_y_train:  [0 1]
y_train_counts:  [36535  1277]
num_y_class:  2


In [63]:
# Test data preprocessing
X_test, y_test = load_data(test_data)  
#-----------------------
X_test = utils.transform_scale_data(X_test, scaler)
#y_test = np.array(utils.get_int_labels_from_str(y_test))
y_test = get_binary_labels_from(y_test)
print("type(X_test): ", type(X_test), " X_test.shape: ",X_test.shape)
print("type(y_test): ", type(y_test), " y_test.shape: ",y_test.shape)

original data shape: (43585, 24, 60)
transposed data shape: (43585, 60, 24)
2d data shape: (2615100, 24)
mvts data shape: (43585, 60, 24)
transBack data shape: (43585, 24, 60)
type(X_test):  <class 'numpy.ndarray'>  X_test.shape:  (43585, 24, 60)
type(y_test):  <class 'numpy.ndarray'>  y_test.shape:  (43585,)


In [64]:
#------------------------------data crawler in train dataset
th = [0.0, 0.1,-0.5, 0.2, 0.3, 0.4, 0.5]
num_train = X_train.shape[0]
num_params = X_train.shape[1]
len_mvts = X_train.shape[2]
#populating adjacency matrices and node attributes of train events
train_adjs = np.zeros((num_train, num_params, num_params))
train_nats = np.zeros((num_train, num_params, len_mvts))
for i in range(num_train):
  #print('Event: ', i)
  mt = X_train[i]   #
  #mt = normalize_node_attributes(mt)
  train_nats[i,:,:] = mt
  cc_mt = np.corrcoef(mt)
  train_adjs[i,:,:] = utils.GetGraphAdjMtrx(cc_mt, [th[0]], True) #get_adj_mat

In [65]:
#---------------------data crawler in test dataset
num_test = X_test.shape[0]
test_adjs = np.zeros((num_test, num_params, num_params))
test_nats = np.zeros((num_test, num_params, len_mvts))
for i in range(num_test):
  
  mt = X_test[i]#.T[:,0:25]
  #mt = normalize_node_attributes(mt)
  test_nats[i,:,:] = mt#smt.T
  cc_mt = np.corrcoef(mt)
  test_adjs[i,:,:] = utils.GetGraphAdjMtrx(cc_mt, [th[0]], True)

In [66]:
#@title MODEL CLASS { form-width: "10%" }
# (GCN) Node emb -> (mean) Graph emb -> (Flatten, Linear) -> window emb -> (LSTM) -> Temporal sequence emb -> (Linear) Class emb
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data

class MVTS_GCN_RNN(torch.nn.Module):
  def __init__(self, num_nodes, input_dims, num_temporal_split, num_sparsity_th, device, gcn_hidden_dims, node_emb_dims, graph_emb_dims, window_emb_dims, sequence_emb_dims, num_classes):
    super(MVTS_GCN_RNN, self).__init__()
    self.num_nodes = num_nodes
    self.input_dims = input_dims
    self.num_temporal_split = num_temporal_split
    self.num_sparsity_th = num_sparsity_th
    self.device = device
    self.gcn_hidden_dims = gcn_hidden_dims
    self.node_emb_dims = node_emb_dims
    self.graph_emb_dims = graph_emb_dims
    self.window_emb_dims = window_emb_dims
    self.sequence_emb_dims = sequence_emb_dims
    self.num_classes = num_classes 

    #self.mvts2vec = nn.LSTM(num_nodes, sequence_emb_dims)#CHANGE*******
    self.conv1 = GCNConv(input_dims, gcn_hidden_dims)
    self.conv2 = GCNConv(gcn_hidden_dims, node_emb_dims)
    #self.conv2_to_class_space = nn.Linear(sequence_emb_dims+node_emb_dims, num_classes)#CHANGE*********
    self.conv2_to_class_space = nn.Linear(num_nodes*node_emb_dims, num_classes)#--------------------------addition

  def forward(self, adj_mat_array, node_att_array):
     
    node_att = node_att_array#----[j,:,:]#25*15
       
    adj_mat = adj_mat_array#[j,k,:,:]

    #prepare for GCNConv
    edge_index_tensor = utils.build_edge_index_tensor(adj_mat) 
    #edge_index_tensor, edge_weights_tensor = get_edge_index_weight_tensor(adj_mat)+++++++++
    node_attributes_tensor = torch.from_numpy(node_att) 
    edge_index = edge_index_tensor.to(self.device)
    #edge_weights = edge_weights_tensor.to(self.device)+++++++++
    x = node_attributes_tensor.to(self.device)
   
    #GCN on the graph
    x = self.conv1(x, edge_index) 
    #x = self.conv1(x=x, edge_index=edge_index, edge_weight=edge_weights)++++++++++++++
       
    x = F.relu(x)
    #x = F.dropout(x, training=self.training)#_____________________epoch+, dropout-, regularization e-7, edge-weight
    x = self.conv2(x, edge_index) # x.shape)#-----[25, 4]--->33x4  ++++++++++++++++++++
    #x = self.conv2(x=x, edge_index=edge_index, edge_weight=edge_weights)
       
    #flattened node embeddings
    x = x.view(1,-1) #x -> [1,x132]
    class_space = self.conv2_to_class_space(x)#sequence2class_space(last_seq_out)#---------------------------addition
    class_scores = F.log_softmax(class_space, dim=1) #class_space.shape)# -----------[1, 4]
    
    return class_scores

In [85]:
#Training
#TORCH_SEED = 2
torch.manual_seed(TORCH_SEED)

NUM_NODES = X_train.shape[1] #33 #25
INPUT_DIMS = X_train.shape[2] #60 #15
NUM_TEMPORAL_SPLIT = 4
NUM_SPARSITY_TH = 6
GCN_HIDDEN_DIMS = 4 #8-ok, 16-ok, 32-no 
NODE_EMB_DIMS = 4 # number of classes/can be tuned
GRAPH_EMB_DIMS = NODE_EMB_DIMS 
WINDOW_EMB_DIMS = 64 #number of sparsity threshold/can be increased 
SEQUENCE_EMB_DIMS = 128 #4 #number of timestamps
NUM_CLASSES = num_y_class #4
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


model = MVTS_GCN_RNN(NUM_NODES, INPUT_DIMS, NUM_TEMPORAL_SPLIT, NUM_SPARSITY_TH, device, GCN_HIDDEN_DIMS, NODE_EMB_DIMS, GRAPH_EMB_DIMS, WINDOW_EMB_DIMS, SEQUENCE_EMB_DIMS, NUM_CLASSES).to(device).double()
loss_function = nn.NLLLoss()
#optimizer = optim.SGD(model.parameters(), lr=0.01)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-3) #weight_decay=1e-5
num_epochs = 1 #---------------------------------------------------

#Train
for epoch in range(num_epochs):
  #print('Epoch: ', epoch)
  for i in range(num_train):#num_train
    model.zero_grad()
    #print('Event: ', i)
    #print ("train_adjs n train_nats:", train_adjs.shape, train_nats.shape)
    adj_mat_array = train_adjs[i]#,:,:,:,:]#(4,6,25,25)
    node_att_array = train_nats[i]#,:,:,:] #(4,25,15)
    class_scores = model(adj_mat_array, node_att_array) 
    target = [int(y_train[i])]
    target = torch.from_numpy(np.array(target))
    target = target.to(device)
    loss = loss_function(class_scores, target)
    loss.backward()
    optimizer.step()
  if(epoch%5==0):
    print ("epoch n loss:", epoch, loss)

epoch n loss: 0 tensor(3.5657e-05, dtype=torch.float64, grad_fn=<NllLossBackward0>)


In [86]:
def RunEpochs(num_epochs = 1, print_loss_interval = 5): 
  for epoch in range(num_epochs):
    for i in range(num_train):#num_train
      model.zero_grad()

      class_scores = model(train_adjs[i], train_nats[i]) 
      #target = [y_train[i]]
      target = torch.from_numpy(np.array([y_train[i]]))
      target = target.to(device)
      loss = loss_function(class_scores, target)
      loss.backward()
      optimizer.step()
    if(epoch % print_loss_interval == 0):
      print ("epoch n loss:", epoch, loss)

#------------------------------train acc
def get_train_accuracy():
  num_train = X_train.shape[0]
  with torch.no_grad():
    numCorrect = 0
    for i in range(num_train):
      train_class_scores = model(train_adjs[i], train_nats[i])
      class_prediction = torch.argmax(train_class_scores, dim=-1) 
  
      if(class_prediction == y_train[i]): 
        numCorrect = numCorrect + 1
    return numCorrect/num_train


#---------test acc
def get_test_accuracy():
  num_test = X_test.shape[0]
  with torch.no_grad():
    numCorrect = 0
    for i in range(num_test):
      test_class_scores = model(test_adjs[i], test_nats[i]) #(adj_mat_array, node_att_array)
      class_prediction = torch.argmax(test_class_scores, dim=-1) 
      
      if(class_prediction == y_test[i]): 
        numCorrect = numCorrect + 1
    return numCorrect/num_test

In [18]:
def get_accuracy():
  print ("train_accuracy:", get_train_accuracy())
  print ("test_accuracy: ", get_test_accuracy())

In [88]:
#Asdf

TOTAL_EPOCHS = 4 #20
EPOCH_INTERVAL = 3 #TOTAL_EPOCHS // 10
print("current epoch: 0")
get_accuracy()
for epoch in range(EPOCH_INTERVAL, TOTAL_EPOCHS, EPOCH_INTERVAL):
    print("current epoch: ", epoch)
    RunEpochs(num_epochs = EPOCH_INTERVAL, print_loss_interval = 300)
    #get_accuracy()

current epoch: 0
train_accuracy: 0.9662276525970591
test_accuracy:  0.9795801307789377
current epoch:  3
epoch n loss: 0 tensor(3.6669e-05, dtype=torch.float64, grad_fn=<NllLossBackward0>)


In [79]:
def get_acc_n_preds():
  num_test = X_test.shape[0]
  with torch.no_grad():
    numCorrect = 0
    preds = []
    for i in range(num_test):
      test_class_scores = model(test_adjs[i], test_nats[i]) #(adj_mat_array, node_att_array)
      class_prediction = torch.argmax(test_class_scores, dim=-1) 
      preds.append(class_prediction)
      if(class_prediction == y_test[i]): 
        numCorrect = numCorrect + 1
    return numCorrect/num_test, preds

In [89]:
#-------TSS
from sklearn import metrics

acc, y_pred = get_acc_n_preds() 
print("Accuracy: ", acc)

TN, FP, FN, TP = metrics.confusion_matrix(y_test, list(np.concatenate(y_pred).flat)).ravel()
tss = (TP / (TP + FN)) - (FP / (FP + TN))
print("TSS: ", tss)

Accuracy:  0.9795801307789377
TSS:  0.0


In [77]:
metrics.confusion_matrix(y_test, list(np.concatenate(y_pred).flat))

array([[42693,     2],
       [  884,     6]])

In [43]:
asdf

metrics.confusion_matrix(y_test, list(np.concatenate(y_pred).flat)).ravel()

array([    0,   172,   521,     0,     0,     0,  2875,  2446,     0,
          14,     0,  1664, 35003,     0,     0,     0,   749,    68,
           0,     0,     0,    73,     0,     0,     0])

In [44]:
metrics.confusion_matrix(y_test, list(np.concatenate(y_pred).flat))

array([[    0,   172,   521,     0,     0],
       [    0,  2875,  2446,     0,    14],
       [    0,  1664, 35003,     0,     0],
       [    0,   749,    68,     0,     0],
       [    0,    73,     0,     0,     0]])

Accuracy:  0.8690604565790984


NameError: name 'metrics' is not defined

In [41]:
print(y_pred[0:9])

yp = list(np.concatenate(y_pred).flat)

yp[0:9]

[tensor([1]), tensor([1]), tensor([1]), tensor([1]), tensor([1]), tensor([1]), tensor([1]), tensor([1]), tensor([1])]


[1, 1, 1, 1, 1, 1, 1, 1, 1]

In [33]:
yp[0]

tensor([1])

In [24]:
print(type(y_test[0]))
print(type(y_pred[0]))

<class 'numpy.int64'>
<class 'torch.Tensor'>


In [30]:
print(y_test[1000:1009])
print(y_pred[1000:1009])
y_pred[0]

[2 2 2 2 2 2 2 2 2]
[tensor([2]), tensor([2]), tensor([2]), tensor([2]), tensor([2]), tensor([2]), tensor([2]), tensor([2]), tensor([2])]


tensor([1])

In [268]:
Accuracy.append(get_test_accuracy())
print(Accuracy)
asdf

[0.56, 0.56, 0.58, 0.52, 0.5]


NameError: name 'asdf' is not defined

In [269]:
print(Accuracy)
print('p.mean(Accuracy) :',np.mean(Accuracy))
print('p.std(Accuracy) :',np.std(Accuracy))
print('p.mean np.std(Accuracy) :     ',np.round(np.mean(Accuracy),2),"+-",np.round(np.std(Accuracy),4) )


[0.56, 0.56, 0.58, 0.52, 0.5]
p.mean(Accuracy) : 0.544
p.std(Accuracy) : 0.029393876913398138
p.mean np.std(Accuracy) :      0.54 +- 0.0294


In [None]:
Accuracy = [0.56]

In [None]:
def get_exp_accuracy(num_exp):
    global model
    global loss_function
    global optimizer
    
    acc = []
    for i in range(num_exp):
        torch.manual_seed(i) # TORCH_SEED = i
        loss_function = nn.NLLLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5) #weight_decay=1e-5
        model = MVTS_GCN_RNN(NUM_NODES, INPUT_DIMS, NUM_TEMPORAL_SPLIT, NUM_SPARSITY_TH, device, GCN_HIDDEN_DIMS, NODE_EMB_DIMS, GRAPH_EMB_DIMS, WINDOW_EMB_DIMS, SEQUENCE_EMB_DIMS, NUM_CLASSES).to(device).double()

        for epoch in range(EPOCH_INTERVAL, TOTAL_EPOCHS, EPOCH_INTERVAL):
            print("Exp no. {}, epoch: {}".format(i, epoch))
            RunEpochs(num_epochs = EPOCH_INTERVAL, print_loss_interval = 300)
            get_accuracy()
        acc.append(get_test_accuracy())
    return acc

In [None]:
Accuracy = get_exp_accuracy(5)

In [None]:
#RunEpochs(num_epochs = 20, print_loss_interval = 2)
#get_accuracy()

In [None]:
torch.__version__


#As

**ASD**

asdas