# Basic Models: SVM and Naive Bayes

### Import necessary packages

In [2]:
import pandas as pd
import numpy as np
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, classification_report
from sklearn.preprocessing import LabelBinarizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
import nltk
import re
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

import mlflow
import os
import logging 
import config 


### Setup

In [3]:
MODEL_NAME = "SVM" 
TRACKING_URI = open("../.mlflow_uri").read().strip()
EXPERIMENT_NAME = config.EXPERIMENT_NAME

logging.basicConfig(format="%(asctime)s: %(message)s") # Configure logging format to show timestamp before every message

logger = logging.getLogger()
logger.setLevel(logging.INFO) # Only show logs that are INFO or more important (e.g., WARNING, ERROR) — but ignore DEBUG.

In [None]:
DATA_PATH = "../data/data_dropped_duplicates_small.csv"

### Get  data

In [5]:
df = pd.read_csv(DATA_PATH, index_col=0)

In [6]:
df.head()

Unnamed: 0,dataset,text,logical_fallacies,source
18384,8,Testing on animals could save the life of you ...,appeal_to_emotion,
11271,3,"I remember when China took over Hong Kong, I r...",none,
15702,4,": The only ""Light at the End of the Tunnel"", i...",appeal_to_emotion,
7148,3,So you only believe there are two ways to run ...,none,
8147,3,Keep things the way they are or change them co...,false_dilemma,


### Preprocess text

In [7]:
def preprocess_text(text):
    # Change text to lower cases
    text = text.lower()
    return text

In [8]:
df['text'] = df['text'].apply(preprocess_text)
df.head()

Unnamed: 0,dataset,text,logical_fallacies,source
18384,8,testing on animals could save the life of you ...,appeal_to_emotion,
11271,3,"i remember when china took over hong kong, i r...",none,
15702,4,": the only ""light at the end of the tunnel"", i...",appeal_to_emotion,
7148,3,so you only believe there are two ways to run ...,none,
8147,3,keep things the way they are or change them co...,false_dilemma,


In [9]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/katharinabaumgartner/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [10]:
# Lemmatize text
lemmatize = nltk.WordNetLemmatizer()
def lemmatize_text(text):
    return ' '.join([lemmatize.lemmatize(word) for word in text.split()])

df['text'] = df['text'].apply(lemmatize_text)
df.head()

Unnamed: 0,dataset,text,logical_fallacies,source
18384,8,testing on animal could save the life of you o...,appeal_to_emotion,
11271,3,"i remember when china took over hong kong, i r...",none,
15702,4,": the only ""light at the end of the tunnel"", i...",appeal_to_emotion,
7148,3,so you only believe there are two way to run a...,none,
8147,3,keep thing the way they are or change them com...,false_dilemma,


In [11]:
y = df["logical_fallacies"]
X = df["text"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.30, random_state=42, stratify=y)

### SVM

In [12]:
mlflow.set_tracking_uri(TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)
mlflow.start_run()
run = mlflow.active_run()
mlflow.set_tag("model_name", MODEL_NAME)
mlflow.set_tag("mlflow.runName", "svm baseline")
# mlflow.log_params(params)

In [13]:
# Use TF-IDF Vecorizer to transform text into numerical data
tfidf_vectorizer = TfidfVectorizer()
X_vectorized = tfidf_vectorizer.fit_transform(X_train)
X_vectorized_test = tfidf_vectorizer.transform(X_test)

In [14]:

# Grid search
param_grid = {
'C': [0.1, 1, 10, 100],
'gamma': ['scale', 'auto'],
'kernel': ['linear', 'rbf']
}

svm = SVC(probability=True)
# svm = SVC()


grid_search = GridSearchCV(svm, param_grid, cv=5)
grid_search.fit(X_vectorized, y_train)

best_params = grid_search.best_params_
print(best_params)

# Predict on train and test data
best_model = grid_search.best_estimator_
y_train_pred = best_model.predict(X_vectorized)
y_test_pred = best_model.predict(X_vectorized_test)

mlflow.log_params(best_params)

# Save model to pickle file:
with open('../models/svm/svm_model.pkl', 'wb') as f:
    pickle.dump(best_model, f)
    




{'C': 10, 'gamma': 'scale', 'kernel': 'linear'}


### Evaluation


In [15]:
def get_metrics(y_true, y_pred, proba):
    logger.info('classification_report')
    classification_report_dict = classification_report(y_true, y_pred, output_dict=True)
    print(classification_report(y_true, y_pred))

    logger.info('confusion_matrix')
    print(confusion_matrix(y_true, y_pred))

    logger.info('brier score')
    # 1. One-hot encode the true labels (y_test)
    lb = LabelBinarizer()
    y_true_onehot = lb.fit_transform(y_true)  # Shape: (n_samples, n_classes)

    # 2. Compute Brier score for multiclass
    brier_score = np.mean(np.sum((proba - y_true_onehot) ** 2, axis=1))
    print("Multiclass Brier score:", brier_score)

    return classification_report_dict, brier_score


In [16]:
def log_metrics(cr, brier, split):
    mlflow.log_metric(f"{split}_brier", brier)

    for key, value in cr.items():
        if (key == "accuracy"):
                # print(f"{split}_{key}", round(value,2))
                mlflow.log_metric(f"{split}_{key}", value)
        else:
            for metric in value:
                mlflow.log_metric(f"{split}_{key}_{metric}", value.get(metric))
                # print(f"{split}_{key}_{metric}", round(value.get(metri

In [17]:
# # Get base_probs
base_probs_train = best_model.predict_proba(X_vectorized)
base_probs_train
classification_report_train, brier= get_metrics(y_train, y_train_pred, base_probs_train)
log_metrics(classification_report_train, brier, "train")

2025-04-07 11:19:08,681: classification_report
2025-04-07 11:19:08,691: confusion_matrix
2025-04-07 11:19:08,693: brier score


                       precision    recall  f1-score   support

           ad_hominem       1.00      1.00      1.00         5
  appeal_to_authority       1.00      1.00      1.00         4
    appeal_to_emotion       1.00      1.00      1.00        14
        false_dilemma       1.00      1.00      1.00         7
faulty_generalization       1.00      1.00      1.00        10
                 none       1.00      1.00      1.00        30

             accuracy                           1.00        70
            macro avg       1.00      1.00      1.00        70
         weighted avg       1.00      1.00      1.00        70

[[ 5  0  0  0  0  0]
 [ 0  4  0  0  0  0]
 [ 0  0 14  0  0  0]
 [ 0  0  0  7  0  0]
 [ 0  0  0  0 10  0]
 [ 0  0  0  0  0 30]]
Multiclass Brier score: 0.5841322361809406


In [18]:
base_probs_test = best_model.predict_proba(X_vectorized_test)
base_probs_test
classification_report_test, brier_test= get_metrics(y_test, y_test_pred,base_probs_test)
log_metrics(classification_report_test, brier_test, "test")

2025-04-07 11:19:09,079: classification_report
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
2025-04-07 11:19:09,088: confusion_matrix
2025-04-07 11:19:09,089: brier score


                       precision    recall  f1-score   support

           ad_hominem       0.00      0.00      0.00         2
  appeal_to_authority       0.00      0.00      0.00         2
    appeal_to_emotion       0.00      0.00      0.00         6
        false_dilemma       0.33      0.33      0.33         3
faulty_generalization       0.00      0.00      0.00         4
                 none       0.46      0.85      0.59        13

             accuracy                           0.40        30
            macro avg       0.13      0.20      0.15        30
         weighted avg       0.23      0.40      0.29        30

[[ 0  0  1  0  0  1]
 [ 0  0  1  0  0  1]
 [ 0  0  0  1  0  5]
 [ 0  0  0  1  0  2]
 [ 0  0  0  0  0  4]
 [ 0  0  1  1  0 11]]
Multiclass Brier score: 0.6794802496510479


In [19]:
mlflow.end_run()

🏃 View run svm baseline at: http://127.0.0.1:5001/#/experiments/118053630762497624/runs/3d51c81189804c4fb3e0845ad526540f
🧪 View experiment at: http://127.0.0.1:5001/#/experiments/118053630762497624


### Naive Bayes

In [27]:
# Pipeline for TF-IFD and Naive Bayes
# pipeline_bayes = Pipeline([
#     ('tfidf', TfidfVectorizer()),
#     ('nb', MultinomialNB()),
# ])

# # Train the model
# pipeline_bayes.fit(X_train, y_train)

# # Predict on train and test data
# y_train_pred_bayes = pipeline_bayes.predict(X_train)
# y_test_pred_bayes = pipeline_bayes.predict(X_test)

# __compute_and_log_metrics(y_train, y_train_pred_bayes, "train")
# __compute_and_log_metrics(y_test, y_test_pred_bayes, "test")

NameError: name '__compute_and_log_metrics' is not defined