In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

In [4]:
df = pd.read_csv("../Dataset/Main/main-dataset-confidence.csv")

In [5]:
X = df[["bilstm_label","bilstm_confidence","bert_label","bert_confidence"]]
y = df["category"]

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
meta_model = RandomForestClassifier(n_estimators=100, random_state=42)
meta_model.fit(X_train, y_train)

In [8]:
y_pred = meta_model.predict(X_test)

In [9]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Meta-Model Accuracy: {accuracy:.4f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

Meta-Model Accuracy: 0.9774
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98      1533
           1       0.97      0.97      0.97      1118

    accuracy                           0.98      2651
   macro avg       0.98      0.98      0.98      2651
weighted avg       0.98      0.98      0.98      2651



In [10]:
import joblib
joblib.dump(meta_model, "meta_model.pkl")

['meta_model.pkl']

In [11]:
meta_model = joblib.load("meta_model.pkl")

In [12]:
def predict_category(bilstm_label, bilstm_confidence, bert_label, bert_confidence):
    input_data = np.array([[bilstm_label, bilstm_confidence, bert_label, bert_confidence]])
    prediction = meta_model.predict(input_data)
    return prediction[0]

In [31]:
bilstm_label = 0
bilstm_confidence = 1
bert_label = 1
bert_confidence = 0.90

In [32]:
predicted_category = predict_category(bilstm_label, bilstm_confidence, bert_label, bert_confidence)
print(f"Predicted Category: {predicted_category}")

Predicted Category: 0




In [33]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [34]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [35]:
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']

In [36]:
meta_model = Sequential([
    Dense(16, activation='relu', input_shape=(4,)),  # Input layer
    Dense(8, activation='relu'),  # Hidden layer
    Dense(1, activation='sigmoid')  # Output layer for binary classification
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [37]:
meta_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

In [40]:
meta_model.fit(X_train, y_train, epochs=30, batch_size=8, validation_data=(X_test, y_test))

Epoch 1/30
[1m1326/1326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9861 - loss: 0.0355 - val_accuracy: 0.9819 - val_loss: 0.0674
Epoch 2/30
[1m1326/1326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9874 - loss: 0.0311 - val_accuracy: 0.9815 - val_loss: 0.0650
Epoch 3/30
[1m1326/1326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - accuracy: 0.9879 - loss: 0.0327 - val_accuracy: 0.9785 - val_loss: 0.0721
Epoch 4/30
[1m1326/1326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9858 - loss: 0.0354 - val_accuracy: 0.9815 - val_loss: 0.0645
Epoch 5/30
[1m1326/1326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9881 - loss: 0.0321 - val_accuracy: 0.9815 - val_loss: 0.0661
Epoch 6/30
[1m1326/1326[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 1ms/step - accuracy: 0.9880 - loss: 0.0313 - val_accuracy: 0.9774 - val_loss: 0.0712
Epoch 7/30
[1m1

<keras.src.callbacks.history.History at 0x23f57b17140>

In [44]:
y_pred_probs = meta_model.predict(X_test)  # Get probability scores
y_pred = (y_pred_probs > 0.5).astype(int)

[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [45]:
from sklearn.metrics import classification_report, confusion_matrix

# Generate classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Generate confusion matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

Classification Report:
              precision    recall  f1-score   support

           0       0.99      0.98      0.98      1533
           1       0.97      0.98      0.98      1118

    accuracy                           0.98      2651
   macro avg       0.98      0.98      0.98      2651
weighted avg       0.98      0.98      0.98      2651

Confusion Matrix:
[[1503   30]
 [  21 1097]]


In [46]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Meta-Model Accuracy: {accuracy:.4f}")

Meta-Model Accuracy: 0.9808


In [47]:
meta_model.save("meta_model_neural.h5")



In [48]:
from tensorflow.keras.models import load_model

# Load trained model and scaler
meta_model = load_model("meta_model_neural.h5")
scaler = joblib.load("scaler.pkl")

# Function to predict category based on input features
def predict_category(bilstm_label, bilstm_confidence, bert_label, bert_confidence):
    input_data = np.array([[bilstm_label, bilstm_confidence, bert_label, bert_confidence]])
    input_data = scaler.transform(input_data)  # Normalize the input
    prediction = meta_model.predict(input_data)
    predicted_category = 1 if prediction[0] > 0.5 else 0  # Convert probability to class label
    return predicted_category

# Example input
bilstm_label = 1
bilstm_confidence = 0.85
bert_label = 1
bert_confidence = 0.92

# Get the prediction
predicted_category = predict_category(bilstm_label, bilstm_confidence, bert_label, bert_confidence)
print(f"Predicted Category: {predicted_category}")




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Predicted Category: 1


