In [35]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.neural_network import BernoulliRBM
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.utils import check_array, check_X_y
from sklearn.model_selection import GridSearchCV

In [36]:

# Load the dataset
data = pd.read_csv('/Users/sumiran/Downloads/preprocessed_dataset.csv')

In [37]:
data.head()


Unnamed: 0,R1-PA1:VH,R1-PM1:V,R1-PA2:VH,R1-PM2:V,R1-PA3:VH,R1-PM3:V,R1-PA4:IH,R1-PM4:I,R1-PA5:IH,R1-PM5:I,...,R4-PA3:VH_R4-PA6:IH_ratio,R4-PM3:V_R4-PM6:I_ratio,R4-PA7:VH_R4-PA10:IH_ratio,R4-PM7:V_R4-PM10:I_ratio,R4-PA8:VH_R4-PA11:IH_ratio,R4-PM8:V_R4-PM11:I_ratio,R4-PA9:VH_R4-PA12:IH_ratio,R4-PM9:V_R4-PM12:I_ratio,phase_angle_diff,marker
0,0.875206,-0.416331,-0.445278,-0.848774,-1.717388,-1.050588,0.84,0.991478,-0.50892,1.307889,...,0.991955,-1.072212,1.044118,-0.709692,-0.011228,-0.227103,0.013165,-0.209933,1.320483,1
1,0.8786,-0.327157,-0.442027,-0.635233,-1.713841,-0.841586,0.843329,0.983953,-0.504557,1.286343,...,0.991003,-0.87128,1.044153,-0.592302,-0.011124,-0.281077,0.012998,-0.260892,1.320628,1
2,0.883164,-0.165022,-0.437676,-0.246977,-1.709189,-0.461583,0.842996,1.000883,-0.500668,1.269106,...,0.987086,-0.52386,1.048743,-0.368436,-0.011113,-0.589095,0.012451,-0.559892,1.320841,1
3,0.883109,-0.173129,-0.437834,-0.26639,-1.709363,-0.480583,0.839722,1.021575,-0.502192,1.273415,...,0.985039,-0.545961,1.051805,-0.373285,-0.011152,-1.173761,0.012782,-1.493546,1.320942,1
4,0.882997,-0.189343,-0.437991,-0.285802,-1.709479,-0.518584,0.838668,1.027219,-0.502245,1.271261,...,0.984513,-0.540093,1.052372,-0.381995,-0.011017,-1.946509,0.012528,-1.493546,1.320988,1


In [15]:
#preprocessing
# # Replacing infinite values with NaNs
# data.replace([np.inf, -np.inf], np.nan, inplace=True)
data = data[~(data.isna().any(axis=1) | data.isin([np.inf, -np.inf]).any(axis=1))]

# handling missing values by filling with the mean of the column
data.fillna(data.mean(numeric_only=True), inplace=True)

In [16]:
# Ensure all columns used for scaling are numeric
scaler = StandardScaler()
numeric_columns = data.select_dtypes(include=[np.number]).columns
data_scaled = scaler.fit_transform(data[numeric_columns])

In [17]:
# Encode the target variable
label_encoder = LabelEncoder()
data['marker'] = label_encoder.fit_transform(data.iloc[:, -1])

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data_scaled, data['marker'], test_size=0.2, random_state=42)

# Convert y_train and y_test to numpy arrays of floats
y_train = np.array(y_train, dtype=float)
y_test = np.array(y_test, dtype=float)


In [30]:
# Define the DBN using RBMs
class DBN(BaseEstimator, ClassifierMixin):
    def __init__(self, rbm_layers=[256, 128], rbm_iter=10, rbm_learning_rate=0.1):
        self.rbm_layers = rbm_layers
        self.rbm_iter = rbm_iter
        self.rbm_learning_rate = rbm_learning_rate
        self.rbms = []
        self.n_layers = len(rbm_layers)
    
    def fit(self, X, y=None):
        input_data = X
        for i in range(self.n_layers):
            rbm = BernoulliRBM(n_components=self.rbm_layers[i], n_iter=self.rbm_iter, learning_rate=self.rbm_learning_rate, verbose=True)
            rbm.fit(input_data)
            input_data = rbm.transform(input_data)
            self.rbms.append(rbm)
        self.rbm_output = input_data
        return self
    
    def transform(self, X):
        input_data = X
        for rbm in self.rbms:
            input_data = rbm.transform(input_data)
        return input_data
    
    def fit_transform(self, X, y=None):
        return self.fit(X, y).transform(X)

In [31]:
# Integrate KELM
class KELM(BaseEstimator, ClassifierMixin):
    def __init__(self, hidden_units=1000, activation='sigmoid'):
        self.hidden_units = hidden_units
        self.activation = activation
    
    def fit(self, X, y):
        X, y = check_X_y(X, y)
        self.input_weights = np.random.normal(size=(X.shape[1], self.hidden_units))
        self.biases = np.random.normal(size=self.hidden_units)
        H = self._activation_function(np.dot(X, self.input_weights) + self.biases)
        self.output_weights = np.dot(np.linalg.pinv(H), y)
        return self
    
    def predict(self, X):
        X = check_array(X)
        H = self._activation_function(np.dot(X, self.input_weights) + self.biases)
        return np.sign(np.dot(H, self.output_weights))
    
    def _activation_function(self, x):
        if self.activation == 'sigmoid':
            return 1 / (1 + np.exp(-x))
        elif self.activation == 'tanh':
            return np.tanh(x)
        elif self.activation == 'relu':
            return np.maximum(0, x)
        else:
            raise ValueError("Unknown activation function: {}".format(self.activation))

In [40]:

# Define the overall model combining DBN and KELM
class DBN_KELM(BaseEstimator, ClassifierMixin):
    def __init__(self, dbn_params=None, kelm_params=None):
        self.dbn_params = dbn_params if dbn_params is not None else {}
        self.kelm_params = kelm_params if kelm_params is not None else {}
        self.dbn = DBN(**self.dbn_params)
        self.kelm = KELM(**self.kelm_params)
    
    def fit(self, X, y):
        X_transformed = self.dbn.fit_transform(X, y)
        self.kelm.fit(X_transformed, y)
        return self
    
    def predict(self, X):
        X_transformed = self.dbn.transform(X)
        return self.kelm.predict(X)

In [41]:


# Example hyperparameter grid for KELM
param_grid = {
    'kelm__hidden_units': [500, 1000, 1500],
    'kelm__activation': ['sigmoid', 'tanh', 'relu'],
    'dbn__rbm_layers': [[256, 128], [512, 256], [128, 64]],
    'dbn__rbm_iter': [10, 20],
    'dbn__rbm_learning_rate': [0.01, 0.1]
}

# # Define the overall model
model = DBN_KELM(dbn_params=dbn_params, kelm_params=kelm_params)

# Perform grid search
grid_search = GridSearchCV(model, param_grid, cv=3, scoring='accuracy')
grid_search.fit(X_train, y_train)

# Print the best parameters and best score
print("Best parameters:", grid_search.best_params_)
print("Best score:", grid_search.best_score_)

# Use the best model found
best_model = grid_search.best_estimator_


ValueError: Invalid parameter 'dbn' for estimator DBN_KELM(dbn_params={'rbm_iter': 10, 'rbm_layers': [256, 128],
                     'rbm_learning_rate': 0.1},
         kelm_params={'activation': 'sigmoid', 'hidden_units': 1000}). Valid parameters are: ['dbn_params', 'kelm_params'].

In [42]:
# Set the parameters for DBN and KELM
dbn_params = {'rbm_layers': [256, 128], 'rbm_iter': 10, 'rbm_learning_rate': 0.1}
kelm_params = {'hidden_units': 1000, 'activation': 'sigmoid'}

In [43]:

# Create the DBN-KELM model
model = DBN_KELM(dbn_params=dbn_params, kelm_params=kelm_params)

In [28]:

# Train the model
model.fit(X_train, y_train)

[BernoulliRBM] Iteration 1, pseudo-likelihood = -272181.19, time = 80.99s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -594074.20, time = 90.99s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -855231.43, time = 47.00s
[BernoulliRBM] Iteration 4, pseudo-likelihood = -1128933.32, time = 49.75s
[BernoulliRBM] Iteration 5, pseudo-likelihood = -1513267.55, time = 42.60s
[BernoulliRBM] Iteration 6, pseudo-likelihood = -1710628.86, time = 54.63s
[BernoulliRBM] Iteration 7, pseudo-likelihood = -2147061.42, time = 81.50s
[BernoulliRBM] Iteration 8, pseudo-likelihood = -2223772.34, time = 105.55s
[BernoulliRBM] Iteration 9, pseudo-likelihood = -2694625.38, time = 79.78s
[BernoulliRBM] Iteration 10, pseudo-likelihood = -2806410.60, time = 44.76s
[BernoulliRBM] Iteration 1, pseudo-likelihood = -13.47, time = 6.45s
[BernoulliRBM] Iteration 2, pseudo-likelihood = -8.67, time = 6.35s
[BernoulliRBM] Iteration 3, pseudo-likelihood = -7.35, time = 6.85s
[BernoulliRBM] Iteration 4, pseudo-likelihood 

In [45]:
# Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Precision:", precision_score(y_test, y_pred, average='macro'))
print("Recall:", recall_score(y_test, y_pred, average='macro'))
print("F1 Score:", f1_score(y_test, y_pred, average='macro'))
