In [None]:
%%capture
%pip install datasets

In [None]:
from datasets import Dataset
from datasets import load_dataset

In [None]:
from google.colab import userdata
from huggingface_hub import login

HF_TOKEN = userdata.get('HF_TOKEN')
login(token = HF_TOKEN)

In [None]:
DATASET_NAME = 'AdityaMayukhSom/MixSub-Hallucinated-Highlight-Features'
ds = load_dataset(DATASET_NAME)

df_train = ds['train'].to_pandas()
df_test = ds['test'].to_pandas()

In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.preprocessing import StandardScaler

# X_train = df_train.iloc[:, 0:-1]
# Y_train = df_train.iloc[:, -1]

# X_test = df_test.iloc[:, 0:-1]
# Y_test = df_test.iloc[:, -1]

df = pd.concat([df_train, df_test], axis = 0)
X, Y = df.iloc[:, 0:-1], df.iloc[:, -1]

training_cols = ['MTP' , 'AVGTP', 'MDVTP' , 'MMDVP']
X = X[training_cols]
# X_train_features, X_test_features = X_train[training_cols], X_test[training_cols]

ob = StandardScaler()
X = ob.fit_transform(X)
X = pd.DataFrame(X, columns = training_cols)

Y = Y.astype(int)

X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, train_size=0.7, random_state = 69
)

In [None]:
print(Y_train.value_counts())
print(Y_test.value_counts())

In [None]:
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.pairplot(X)
plt.show()

## Training Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

lr_clf = LogisticRegression(verbose=1)
lr_clf.fit(X_test, Y_test)

Y_pred_train = lr_clf.predict(X_train)
Y_pred_test = lr_clf.predict(X_test)

accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print(f"Train Accuracy: {accuracy_train * 100:.2f}%")
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

print("\n\n")

print(classification_report(Y_train, Y_pred_train))
print(classification_report(Y_test, Y_pred_test))

## Training Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
from sklearn.naive_bayes import MultinomialNB
from sklearn.naive_bayes import BernoulliNB
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Gaussian Naive Bayes
gnb_clf = GaussianNB()
gnb_clf.fit(X_train, Y_train)

# Bernoulli Naive Bayes
bnb_clf = BernoulliNB()
bnb_clf.fit(X_train, Y_train)

# Multinomial Naive Bayes
# mnb_clf = MultinomialNB()
# mnb_clf.fit(X_train, Y_train)

# Change x_clf.predict according to which NB we want to test on.
Y_pred_train = bnb_clf.predict(X_train)
Y_pred_test = bnb_clf.predict(X_test)

accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print(f"Train Accuracy: {accuracy_train * 100:.2f}%")
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

print("\n\n")

print(classification_report(Y_train, Y_pred_train))
print(classification_report(Y_test, Y_pred_test))

## Training KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

knn_clf = KNeighborsClassifier(n_neighbors=7)
knn_clf.fit(X_train, Y_train)

Y_pred_train = knn_clf.predict(X_train)
Y_pred_test = knn_clf.predict(X_test)

accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print(f"Train Accuracy: {accuracy_train * 100:.2f}%")
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

print("\n\n")

print(classification_report(Y_train, Y_pred_train))
print(classification_report(Y_test, Y_pred_test))

## Training Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf_clf = RandomForestClassifier(n_estimators=50, random_state=42, max_depth=8)
rf_clf.fit(X_train, Y_train)

Y_pred_train = rf_clf.predict(X_train)
Y_pred_test = rf_clf.predict(X_test)

accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print(f"Train Accuracy: {accuracy_train * 100:.2f}%")
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

print("\n\n")

print(classification_report(Y_train, Y_pred_train))
print(classification_report(Y_test, Y_pred_test))

In [None]:
import xgboost as xgb

learning_rate_range = np.arange(0.01, 1, 0.03)
test_XG = []
train_XG = []
for lr in learning_rate_range:
    xgb_classifier = xgb.XGBClassifier(eta = lr)
    xgb_classifier.fit(X_train, Y_train)
    train_XG.append(xgb_classifier.score(X_train, Y_train))
    test_XG.append(xgb_classifier.score(X_test, Y_test))

accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print(f"Train Accuracy: {accuracy_train * 100:.2f}%")
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

print("\n\n")

print(classification_report(Y_train, Y_pred_train))
print(classification_report(Y_test, Y_pred_test))

In [None]:
fig = plt.figure(figsize=(10, 7))
plt.plot(learning_rate_range, train_XG, c='orange', label='Train')
plt.plot(learning_rate_range, test_XG, c='m', label='Test')
plt.xlabel('Learning rate')
plt.xticks(learning_rate_range)
plt.ylabel('Accuracy score')
plt.ylim(0.6, 1)
plt.legend(prop={'size': 12}, loc=3)
plt.title('Accuracy score vs. Learning rate of XGBoost', size=14)
plt.show()

In [None]:
# new learning rate range
learning_rate_range = np.arange(0.01, 0.5, 0.05)
fig = plt.figure(figsize=(19, 17))
idx = 1
# grid search for min_child_weight
for weight in np.arange(0, 4.5, 0.5):
    train = []
    test = []
    for lr in learning_rate_range:
        xgb_classifier = xgb.XGBClassifier(eta = lr, reg_lambda=1, min_child_weight=weight)
        xgb_classifier.fit(X_train, Y_train)
        train.append(xgb_classifier.score(X_train, Y_train))
        test.append(xgb_classifier.score(X_test, Y_test))
    fig.add_subplot(3, 3, idx)
    idx += 1
    plt.plot(learning_rate_range, train, c='orange', label='Training')
    plt.plot(learning_rate_range, test, c='m', label='Testing')
    plt.xlabel('Learning rate')
    plt.xticks(learning_rate_range)
    plt.ylabel('Accuracy score')
    plt.ylim(0.6, 1)
    plt.legend(prop={'size': 12}, loc=3)
    title = "Min child weight:" + str(weight)
    plt.title(title, size=16)
plt.show()

In [None]:
log_odds = lr_clf.coef_[0]
odds = np.exp(lr_clf.coef_[0])
lr_features_log = {k: v for k, v in zip(X_train.keys(), log_odds)}
lr_features_no_log = {k: v for k, v in zip(X_train.keys(), odds)}

print("log", lr_features_log)
print("no_log", lr_features_no_log)

## Training SVC

In [None]:
from sklearn.svm import SVC

svc_clf = SVC(
    kernel='rbf',
    degree = 7
)

svc_clf.fit(X_train, Y_train)

Y_pred_train = svc_clf.predict(X_train)
Y_pred_test = svc_clf.predict(X_test)

accuracy_train = accuracy_score(Y_train, Y_pred_train)
accuracy_test = accuracy_score(Y_test, Y_pred_test)

print(f"Train Accuracy: {accuracy_train * 100:.2f}%")
print(f"Test Accuracy: {accuracy_test * 100:.2f}%")

print("\n\n")

print(classification_report(Y_train, Y_pred_train))
print(classification_report(Y_test, Y_pred_test))

## Training SNN

In [None]:
import torch
import torch.nn as nn

class SimpleDenseNet(nn.Module):
    def __init__(self, input_dim: int, hidden_dim: int, output_dim=1, dropout_prob=0.4):
        super(SimpleDenseNet, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.tanh = nn.Tanh()
        # self.fc2 = nn.Linear(hidden_dim, output_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.fc3 = nn.Linear(hidden_dim, hidden_dim)
        self.fc4 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.tanh(x)
        x = self.fc2(x)
        x = self.tanh(x)
        x = self.fc3(x)
        x = self.tanh(x)
        x = self.fc4(x)
        x = self.sigmoid(x)
        return x

In [None]:
device = "cuda" if torch.cuda.is_available() else 'cpu'
device = torch.device(device)

In [None]:
denseModel = SimpleDenseNet(
    input_dim=np.array([True, True, True, True]).sum(), hidden_dim=1024
).to(device)

## Code declaring and computing all the metrics to measure

In [None]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    roc_auc_score,
    precision_recall_curve,
    auc
)


def compute_metrics(model, input_tensor, true_labels):
    with torch.no_grad():
        outputs = model(input_tensor)
        predicted_probs = torch.sigmoid(outputs).cpu().numpy()
        predicted = (outputs > 0.5).float().cpu().numpy()

        true_labels = true_labels.cpu().numpy()

        acc = accuracy_score(true_labels, predicted)
        precision = precision_score(true_labels, predicted)
        recall = recall_score(true_labels, predicted)
        f1 = f1_score(true_labels, predicted)

        precision_negative = precision_score(true_labels, predicted, pos_label=0)
        recall_negative = recall_score(true_labels, predicted, pos_label=0)
        f1_negative = f1_score(true_labels, predicted, pos_label=0)

        tn, fp, fn, tp = confusion_matrix(true_labels, predicted).ravel()
        roc_auc = roc_auc_score(true_labels, predicted_probs)

        P, R, thre = precision_recall_curve(true_labels, predicted, pos_label=1)
        pr_auc = auc(R, P)

        roc_auc_negative = roc_auc_score(
            true_labels, 1 - predicted_probs
        )  # If predicted_probs is the probability of the positive class
        P_neg, R_neg, _ = precision_recall_curve(true_labels, predicted, pos_label=0)
        pr_auc_negative = auc(R_neg, P_neg)

        return {
            "Accuracy": acc,
            "Precision": precision,
            "Recall": recall,
            "F1": f1,
            "TP": tp,
            "TN": tn,
            "FP": fp,
            "FN": fn,
            "ROC AUC": roc_auc,
            "PR AUC": pr_auc,
            "Precision-Negative": precision_negative,
            "Recall-Negative": recall_negative,
            "F1-Negative": f1_negative,
            "ROC AUC-Negative": roc_auc_negative,
            "PR AUC-Negative": pr_auc_negative,
        }

## Code for training the Dense Model and getting the result of all metrics corresponding to the Testing Set.

In [None]:
import numpy as np

def compute_accuracy(model, input_tensor, true_labels):
    with torch.no_grad():
        outputs = model(input_tensor)
        predicted = (outputs > 0.5).float()
        correct = (predicted == true_labels).float().sum()
        accuracy = correct / len(true_labels)
        return accuracy.item()


X_train_tensor = torch.Tensor(X_train.to_numpy(dtype=np.float64)).to(device)
Y_train_tensor = torch.Tensor(Y_train.to_numpy(dtype=np.float64)).view(-1, 1).to(device)

print(X_train_tensor.shape, Y_train_tensor.shape)

# Define loss and optimizer
criterion = nn.BCELoss()
optimizer = torch.optim.Adam(denseModel.parameters(), lr=0.001)

bestValAcc = 0
# Training loop
num_epochs = 2000
for epoch in range(num_epochs):
    denseModel.train()
    optimizer.zero_grad()
    outputs = denseModel(X_train_tensor)
    loss = criterion(outputs, Y_train_tensor)
    loss.backward()
    optimizer.step()

    # Compute training accuracy
    train_accuracy = compute_accuracy(denseModel, X_train_tensor, Y_train_tensor)

    # Uncomment this if you want to see how the accuracy of testing improves during the training process.
    ##Compute testing accuracy
    # X_val_tensor = torch.tensor(X_val_features, dtype=torch.float32).to(device)
    # Y_val_tensor = torch.tensor(Y_val, dtype=torch.float32).view(-1, 1).to(device)

    # val_accuracy = compute_accuracy(denseModel, X_val_tensor, Y_val_tensor)

    # if bestValAcc < val_accuracy:
    #     bestValAcc = val_accuracy
    #     print(f'Saving model with best validation accuracy ...')
    #     torch.save(denseModel.state_dict(), 'llama-' + task + '-best-model')

    if (epoch + 1) % 10 == 0:
        print(
            f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Training Accuracy: {train_accuracy:.4f}"
        )  # , "Validation Accuracy": {val_accuracy:.4f}')

## Uncomment next cell if you want to load a particular model you already trained.

In [None]:
# loaded_model = SimpleDenseNet(input_dim=len(list(features_to_extract.keys())), hidden_dim=512).to(device)
# loaded_model.load_state_dict(torch.load('llama-' + task + '-best-model'))

# # Set the model to evaluation mode"
# loaded_model.eval()

## Compute the metrics using the model on the Test Set.

In [None]:
# X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
# Y_test_tensor = torch.tensor(Y_test, dtype=torch.float32).view(-1, 1).to(device)

X_test_tensor = torch.Tensor(X_test.to_numpy(dtype=np.float64)).to(device)
Y_test_tensor = torch.Tensor(Y_test.to_numpy(dtype=np.float64)).view(-1, 1).to(device)


# test_metrics = compute_metrics(denseModel, X_train_tensor, Y_train_tensor)
test_metrics = compute_metrics(denseModel, X_test_tensor, Y_test_tensor)

print(
    f"Testing - Accuracy: {test_metrics['Accuracy']:.4f}, Precision: {test_metrics['Precision']:.4f}, Recall: {test_metrics['Recall']:.4f}, F1: {test_metrics['F1']:.4f}, ROC AUC: {test_metrics['ROC AUC']:.4f}, PR AUC: {test_metrics['PR AUC']:.4f}"
)
print(
    f"Testing - Negative: {test_metrics['Accuracy']:.4f}, Precision-Negative: {test_metrics['Precision-Negative']:.4f}, Recall-Negative: {test_metrics['Recall-Negative']:.4f}, F1-Negative: {test_metrics['F1-Negative']:.4f}, ROC AUC-Negative: {test_metrics['ROC AUC-Negative']:.4f}, PR AUC-Negative: {test_metrics['PR AUC-Negative']:.4f}"
)

In [None]:
torch.save(denseModel)

## Save the results on a CSV if you want.

In [None]:
model_dataframe = pd.DataFrame(
    columns=[
        "features",
        "model_name",
        "feature_to_extract",
        "method",
        "accuracy",
        "precision",
        "recall",
        "roc auc",
        "pr auc",
        "negative",
        "precision-negative",
        "recall-negative",
        "negative f1",
        "lr_accuracy",
        "lr_features_log",
        "lr_features_no_log",
    ]
)

In [None]:
d = {
    "features": features_to_extract,
    "model_name": str(model.getName()),
    "feature_to_extract": feature_to_extract,
    "method": "TEST",
    "accuracy": test_metrics["Accuracy"],
    "precision": test_metrics["Precision"],
    "recall": test_metrics["Recall"],
    "f1": test_metrics["F1"],
    "pr auc": test_metrics["PR AUC"],
    "precision-negative": test_metrics["Precision-Negative"],
    "recall-negative": test_metrics["Recall-Negative"],
    "negative-f1": test_metrics["F1-Negative"],
    "lr_accuracy": lr_accuracy,
    "lr_features_log": lr_features_log,
    "lr_features_no_log": lr_features_no_log,
}

In [None]:
# model_dataframe.loc[len(model_dataframe.index)] = d
# model_dataframe.head()
# csv_name = f"{model.getSanitizedName()}_{task}_{includeKnowledge=}_{includeConditioned=}_{'_'.join([f'{k}={v}' for k, v in features_to_extract.items()])}.csv"
# model_dataframe.to_csv(output_path / csv_name, index=False)