In [1]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras.models import load_model
from sklearn.model_selection import train_test_split
import tensorflow as tf
import pandas as pd
from imblearn.over_sampling import SMOTE


In [None]:
def normalization(data):
    min_vals = np.min(data, axis=0)
    max_vals = np.max(data, axis=0)
    _range = max_vals - min_vals
    return (data - min_vals) / _range

original_data = pd.read_csv('./datasets/NASA/kc1.csv')       

original_data.isnull().values.any()  # Gives false ie:No null value in dataset
original_data = original_data.fillna(value=False)  
original_Y = original_data['defects']  
original_Y = pd.DataFrame(original_Y)    
original_data = normalization(original_data)    

original_X = pd.DataFrame(original_data.drop(['defects'], axis=1))  

x_train, x_test, y_train, y_test = train_test_split(original_X, original_Y, test_size=.1, random_state=12)
print(x_train.shape, y_train.shape,x_test.shape, y_test.shape)
sm = SMOTE(random_state=12, sampling_strategy=1.0)  
x, y = sm.fit_resample(x_train, y_train)  
y_train = pd.DataFrame(y, columns=['defects'])    
x_train = pd.DataFrame(x, columns=original_X.columns)


In [None]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=.1, random_state=12)

x_train = x_train.values
x_val = x_val.values
x_test = x_test.values
y_train = y_train.values
y_val = y_val.values
y_test = y_test.values

x_train = x_train.reshape(x_train.shape[0], x_train.shape[1], 1)
x_val = x_val.reshape(x_val.shape[0], x_val.shape[1], 1)
x_test = x_test.reshape(x_test.shape[0], x_test.shape[1], 1)

# print(x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape)


In [None]:
n_classes = 2

y_train = tf.keras.utils.to_categorical(y_train, n_classes)
y_val = tf.keras.utils.to_categorical(y_val, n_classes)
y_test = tf.keras.utils.to_categorical(y_test, n_classes)

print(y_train.shape, y_val.shape, y_test.shape)

(2899, 2) (323, 2) (211, 2)


In [None]:
x_train = x_train.astype(np.float32)
x_val = x_val.astype(np.float32)
x_test = x_test.astype(np.float32)

In [6]:
input_shape  = (x_train.shape[1], 1)

print(f'input_shape: {input_shape}')

input_shape: (21, 1)


In [8]:
from models.convnet import ConvNet
from models.tree import SoftDecisionTree

In [9]:
nn = ConvNet(input_shape, n_classes=n_classes)  

In [None]:
y_train_soft = nn.predict(x_train)

In [None]:
x_train_flat = x_train.reshape((x_train.shape[0], -1))
x_val_flat = x_val.reshape((x_val.shape[0], -1))
x_test_flat = x_test.reshape((x_test.shape[0], -1))



x_train_flat.shape, x_val_flat.shape, x_test_flat.shape

((2899, 21), (323, 21), (211, 21))

In [None]:
max_depth = 4
n_features = x_train.shape[1]
n_classes = 2
penalty_strength = 1e+1
penalty_decay = 0.25
inv_temp = 0.01  
epochs = 40
ema_win_size = 100
 
g_model = SoftDecisionTree(max_depth=max_depth, n_features=n_features, n_classes=n_classes, 
                          penalty_strength=penalty_strength, penalty_decay=penalty_decay, 
                          inv_temp=inv_temp, ema_win_size=ema_win_size)


In [13]:
from joint import analyze, train, evaluate

In [None]:
epochs = 40 
batch_size = 32  

f_model_path = 'assets/kc1/f_model_joint'
g_model_path = 'assets/kc1/g_model_joint'

f_model_exists = os.path.exists(f_model_path + ".index")
g_model_exists = os.path.exists(g_model_path + ".index")

data_test = (x_test, x_test_flat, y_test)
data_val = (x_val, x_val_flat, y_val)

if not f_model_exists or not g_model_exists:
    f_model_joint, g_model_joint = train(nn, g_model, x_train, x_train_flat, y_train, data_val, epochs, batch_size=batch_size)
    
    f_model_joint.save_weights(f_model_path)
    g_model_joint.save_weights(g_model_path)
    

else:
    g_model_joint = SoftDecisionTree(max_depth=max_depth, n_features=n_features, n_classes=n_classes, 
                          penalty_strength=penalty_strength, penalty_decay=penalty_decay, 
                          inv_temp=inv_temp, ema_win_size=ema_win_size)
    f_model_joint = ConvNet(input_shape, n_classes=n_classes)  

    f_model_joint.load_weights(f_model_path)
    g_model_joint.load_weights(g_model_path)
    

In [None]:
from joint import analyze

f_joint_acc, fidelity, g_joint_acc = analyze(f_model_joint, g_model_joint, x_test, x_test_flat, y_test)

print("Accuracy of f (in %): {:.2f}".format(f_joint_acc * 100))
print("Accuracy of g (in %): {:.2f}".format(g_joint_acc * 100))
print("Fidelity (in %): {:.2f}".format(fidelity * 100))


### 计算其他指标（MCC、AUC、F1-score）