In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score, f1_score

# Load the dataset
df = pd.read_csv("Hallucination-Dataset-400-Samples.csv")

# Handle missing values
df.fillna("", inplace=True)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df[['Context', 'Question', 'Answer']], df['Hallucination'], test_size=0.4, random_state=42)

# Define preprocessing steps
preprocessor = ColumnTransformer(
    transformers=[
        ('tfidf_context', TfidfVectorizer(), 'Context'),
        ('tfidf_question', TfidfVectorizer(), 'Question'),
        ('tfidf_answer', TfidfVectorizer(), 'Answer')
    ],
    remainder='passthrough'
)

# Define the classifier
clf = Pipeline(steps=[('preprocessor', preprocessor),
                      ('classifier', LogisticRegression(random_state=42))])

# Define hyperparameters to tune
param_grid = {
    'classifier__C': [0.001, 0.01, 0.1, 1, 10, 100],
    'classifier__penalty': ['l2'],
    'preprocessor__tfidf_context__ngram_range': [(1, 1), (1, 2)],
    'preprocessor__tfidf_question__ngram_range': [(1, 1), (1, 2)],
    'preprocessor__tfidf_answer__ngram_range': [(1, 1), (1, 2)]
}

# Grid search for hyperparameter tuning
grid_search = GridSearchCV(clf, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

# Get the best model from grid search
best_clf = grid_search.best_estimator_

# Predict on the testing set
y_pred = best_clf.predict(X_test)
# Calculate performance metrics
accuracy = accuracy_score(y_test, y_pred)
auc_roc_score = roc_auc_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

# Print performance metrics
print("Accuracy:", accuracy)
print("AUC-ROC Score:", auc_roc_score)
print("F1 Score:", f1)

# Print the counts of 0s and 1s in the 'Hallucination' column
print("\nHallucination column:")
print(y_test.value_counts())

# Print the counts of 0s and 1s in the 'Prediction' column
print("\nPrediction column:")
print(pd.Series(y_pred).value_counts())

# Concatenate X_test, y_test, and y_pred to form a DataFrame
results_df = pd.concat([X_test.reset_index(drop=True), y_test.reset_index(drop=True), pd.Series(y_pred, name='Prediction')], axis=1)

# Print the complete testing dataset with all columns
print(results_df)

# evaluate the model's performance on unseen data.
best_clf.fit(X_train, y_train)

# Predict on the complete dataset
y_pred_complete = best_clf.predict(df[['Context', 'Question', 'Answer']])

# Calculate performance metrics for the complete dataset
accuracy_complete = accuracy_score(df['Hallucination'], y_pred_complete)
auc_roc_score_complete = roc_auc_score(df['Hallucination'], y_pred_complete)
f1_complete = f1_score(df['Hallucination'], y_pred_complete)

# Print performance metrics for the complete dataset
print("\nComplete Dataset Performance Metrics:")
print("Accuracy:", accuracy_complete)
print("AUC-ROC Score:", auc_roc_score_complete)
print("F1 Score:", f1_complete)

# Print the counts of 0s and 1s in the 'Hallucination' column for the complete dataset
print("\nComplete Dataset Hallucination column:")
print(df['Hallucination'].value_counts())

# Print the counts of 0s and 1s in the 'Prediction' column for the complete dataset
print("\nComplete Dataset Prediction column:")
print(pd.Series(y_pred_complete).value_counts())

# Concatenate the complete dataset with predictions
complete_results_df = pd.concat([df.reset_index(drop=True), pd.Series(y_pred_complete, name='Prediction')], axis=1)

# Print the complete dataset with all columns including Hallucination and Prediction
print("\nComplete Dataset with Predictions:")
print(complete_results_df)



Accuracy: 0.8198757763975155
AUC-ROC Score: 0.809075907590759
F1 Score: 0.8557213930348259

Hallucination column:
1    101
0     60
Name: Hallucination, dtype: int64

Prediction column:
1    100
0     61
dtype: int64
                                               Context  \
0    The CoolMOS™ technology is well-established in...   
1    Infineon products contribute by providing volt...   
2    In the field of wireless-LAN applications, RF ...   
3    Because of the extremely high cost, they canno...   
4    Depletion-mode MOSFETs are considered the most...   
..                                                 ...   
156  The my-d™ move range is designed to meet the r...   
157  The 600 V CoolMOS™ P7 family serves various st...   
158  PROFET™ switches are ideally suited for advanc...   
159  Infineon's p-n junction-isolation (JI) technol...   
160  Medical wearables must combine seamless connec...   

                                              Question  \
0    Why is CoolMOS™ technol