# Hybrid approaches

## Naive Bayes and Pattern.nl

In [1]:
# Install libraries
!pip install pattern
!pip install scikit-learn
!pip install nltk

Collecting pattern
  Downloading Pattern-3.6.0.tar.gz (22.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting backports.csv (from pattern)
  Downloading backports.csv-1.0.7-py2.py3-none-any.whl (12 kB)
Collecting mysqlclient (from pattern)
  Downloading mysqlclient-2.2.4.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.4/90.4 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting feedparser (from pattern)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six (from pattern)
  Downloadin

In [2]:
import pandas as pd
import numpy as np
import random
import string
import nltk
from nltk.stem.snowball import SnowballStemmer
from pattern.nl import sentiment
from sklearn.naive_bayes import MultinomialNB
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MaxAbsScaler  # Changed MinMaxScaler to MaxAbsScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.sparse import hstack

# Download NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Set seed for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

# Setting the seed
set_seed(42)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Replace the numerical labels with the sentiment categories
def map_labels(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Custom transformer to preprocess Dutch text
class DutchTextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.stemmer = SnowballStemmer('dutch')
        self.stop_words = set(stopwords.words('dutch'))

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        preprocessed_texts = []
        for text in X:
            # Convert to lowercase
            text = text.lower()
            # Tokenize text
            tokens = word_tokenize(text, language='dutch')
            # Remove punctuation and numbers
            tokens = [word for word in tokens if word.isalnum()]
            # Remove stop words
            tokens = [word for word in tokens if word not in self.stop_words]
            # Stemming
            tokens = [self.stemmer.stem(word) for word in tokens]
            # Join tokens back into string
            preprocessed_text = ' '.join(tokens)
            preprocessed_texts.append(preprocessed_text)
        return preprocessed_texts

# Function to perform sentiment analysis and generate classification report
def cross_val_analysis(X_train_val, y_train_val, X_test, y_test):
    tfidf = TfidfVectorizer()
    scaler = MaxAbsScaler()  # Changed to MaxAbsScaler
    preprocessor = DutchTextPreprocessor()

    # Preprocess training and validation data
    X_train_val_preprocessed = preprocessor.fit_transform(X_train_val)

    # Fit and transform the training data with TF-IDF
    X_train_val_tfidf = tfidf.fit_transform(X_train_val_preprocessed)

    # Scale TF-IDF features
    X_train_val_tfidf_scaled = scaler.fit_transform(X_train_val_tfidf)

    # Transform the test data with preprocessor and TF-IDF
    X_test_preprocessed = preprocessor.transform(X_test)
    X_test_tfidf = tfidf.transform(X_test_preprocessed)
    X_test_tfidf_scaled = scaler.transform(X_test_tfidf)

    # Apply SMOTE to the training data
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X_train_val_tfidf_scaled, y_train_val)

    nb_classifier = MultinomialNB()

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    y_pred_cv = cross_val_predict(nb_classifier, X_resampled, y_resampled, cv=skf)

    # Cross-validation classification report and confusion matrix
    report_cv = classification_report(y_resampled, y_pred_cv, zero_division=0)
    cm_cv = confusion_matrix(y_resampled, y_pred_cv, labels=["negative", "neutral", "positive"])

    # Train final model on the entire training+validation set and test on the unseen test set
    nb_classifier.fit(X_resampled, y_resampled)
    y_pred_test = nb_classifier.predict(X_test_tfidf_scaled)

    # Test set classification report and confusion matrix
    report_test = classification_report(y_test, y_pred_test, zero_division=0)
    cm_test = confusion_matrix(y_test, y_pred_test, labels=["negative", "neutral", "positive"])

    return report_cv, cm_cv, report_test, cm_test

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]  # use the name from the CSV files
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Split the dataset into 85% training+validation and 15% test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

    # Further split the training+validation set so that in the end there is 70% training and 15% validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42, stratify=y_train_val)

    # Combine training and validation sets for cross-validation
    X_train_val_combined = pd.concat([X_train, X_val])
    y_train_val_combined = pd.concat([y_train, y_val])

    # Perform cross-validation analysis
    report_cv, cm_cv, report_test, cm_test = cross_val_analysis(X_train_val_combined, y_train_val_combined, X_test, y_test)

    # Print cross-validation classification report and confusion matrix
    print(f"Cross-Validation Classification Report for {dataset_name}:\n", report_cv)
    print(f"Cross-Validation Confusion Matrix for {dataset_name}:\n", cm_cv)
    print("-" * 50)

    # Print test set classification report and confusion matrix
    print(f"Test Set Classification Report for {dataset_name}:\n", report_test)
    print(f"Test Set Confusion Matrix for {dataset_name}:\n", cm_test)
    print("=" * 50)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Processing 1960s_gas...
Cross-Validation Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.89      0.89      0.89       187
     neutral       0.76      0.76      0.76       187
    positive       0.75      0.74      0.75       187

    accuracy                           0.80       561
   macro avg       0.80      0.80      0.80       561
weighted avg       0.80      0.80      0.80       561

Cross-Validation Confusion Matrix for 1960s_gas:
 [[167   9  11]
 [  9 143  35]
 [ 12  36 139]]
--------------------------------------------------
Test Set Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.40      0.33      0.36        12
     neutral       0.36      0.25      0.29        20
    positive       0.54      0.67      0.59        33

    accuracy                           0.48        65
   macro avg       0.43      0.42      0.42        65
weighted avg       0

## Support Vector Machine and Pattern.nl

In [3]:
!pip install pattern
!pip install scikit-learn
!pip install nltk



In [4]:
import pandas as pd
import numpy as np
import random
import string
import nltk
from nltk.stem.snowball import SnowballStemmer
from pattern.nl import sentiment
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.sparse import hstack

# Download NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Function to set all seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

# Setting the seed
set_seed(42)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Replace the numerical labels with the sentiment categories
def map_labels(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Custom transformer to preprocess Dutch text
class DutchTextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.stemmer = SnowballStemmer('dutch')
        self.stop_words = set(stopwords.words('dutch'))

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        preprocessed_texts = []
        for text in X:
            # Convert to lowercase
            text = text.lower()
            # Tokenize text
            tokens = word_tokenize(text, language='dutch')
            # Remove punctuation and numbers
            tokens = [word for word in tokens if word.isalnum()]
            # Remove stop words
            tokens = [word for word in tokens if word not in self.stop_words]
            # Stemming
            tokens = [self.stemmer.stem(word) for word in tokens]
            # Join tokens back into string
            preprocessed_text = ' '.join(tokens)
            preprocessed_texts.append(preprocessed_text)
        return preprocessed_texts

# Custom transformer for lexicon-based features
class LexiconBasedTransformer(BaseEstimator, TransformerMixin):
    def __init__(self):
        pass  # Add any initialization if needed

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        lexicon_features = []
        for text in X:
            polarity, _ = sentiment(text)
            lexicon_features.append([polarity])
        return np.array(lexicon_features)

# Function to perform sentiment analysis and generate classification report
def cross_val_analysis(X_train_val, y_train_val, X_test, y_test):
    tfidf = TfidfVectorizer()
    lexicon_transformer = LexiconBasedTransformer()
    scaler = MinMaxScaler()

    # Preprocess training and validation data
    preprocessor = DutchTextPreprocessor()
    X_train_val_preprocessed = preprocessor.fit_transform(X_train_val)
    X_test_preprocessed = preprocessor.transform(X_test)

    # Fit and transform the training data with TF-IDF and lexicon-based transformer
    X_train_val_tfidf = tfidf.fit_transform(X_train_val_preprocessed)
    X_train_val_lexicon = lexicon_transformer.fit_transform(X_train_val_preprocessed)
    X_train_val_lexicon = scaler.fit_transform(X_train_val_lexicon)  # Scale lexicon features
    X_train_val_combined = hstack([X_train_val_tfidf, X_train_val_lexicon])

    # Transform the test data with TF-IDF and lexicon-based transformer
    X_test_tfidf = tfidf.transform(X_test_preprocessed)
    X_test_lexicon = lexicon_transformer.transform(X_test_preprocessed)
    X_test_lexicon = scaler.transform(X_test_lexicon)  # Scale lexicon features
    X_test_combined = hstack([X_test_tfidf, X_test_lexicon])

    # Apply SMOTE to the training data
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X_train_val_combined, y_train_val)

    svm_classifier = SVC(kernel='linear', random_state=42)

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
    y_pred_cv = cross_val_predict(svm_classifier, X_resampled, y_resampled, cv=skf)

    # Cross-validation classification report and confusion matrix
    report_cv = classification_report(y_resampled, y_pred_cv, zero_division=0)
    cm_cv = confusion_matrix(y_resampled, y_pred_cv, labels=["negative", "neutral", "positive"])

    # Train final model on the entire training+validation set and test on the unseen test set
    svm_classifier.fit(X_resampled, y_resampled)
    y_pred_test = svm_classifier.predict(X_test_combined)

    # Test set classification report and confusion matrix
    report_test = classification_report(y_test, y_pred_test, zero_division=0)
    cm_test = confusion_matrix(y_test, y_pred_test, labels=["negative", "neutral", "positive"])

    return report_cv, cm_cv, report_test, cm_test

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Split the dataset into 85% training+validation and 15% test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=42, stratify=y)

    # Further split the training+validation set so that in the end there is 70% training and 15% validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=42, stratify=y_train_val)

    # Combine training and validation sets for cross-validation
    X_train_val_combined = pd.concat([X_train, X_val])
    y_train_val_combined = pd.concat([y_train, y_val])

    # Perform cross-validation analysis
    report_cv, cm_cv, report_test, cm_test = cross_val_analysis(X_train_val_combined, y_train_val_combined, X_test, y_test)

    # Print cross-validation classification report and confusion matrix
    print(f"Cross-Validation Classification Report for {dataset_name}:\n", report_cv)
    print(f"Cross-Validation Confusion Matrix for {dataset_name}:\n", cm_cv)
    print("-" * 50)

    # Print test set classification report and confusion matrix
    print(f"Test Set Classification Report for {dataset_name}:\n", report_test)
    print(f"Test Set Confusion Matrix for {dataset_name}:\n", cm_test)
    print("=" * 50)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Processing 1960s_gas...
Cross-Validation Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.90      0.94      0.92       187
     neutral       0.80      0.79      0.80       187
    positive       0.78      0.75      0.77       187

    accuracy                           0.83       561
   macro avg       0.83      0.83      0.83       561
weighted avg       0.83      0.83      0.83       561

Cross-Validation Confusion Matrix for 1960s_gas:
 [[175   2  10]
 [  9 148  30]
 [ 11  35 141]]
--------------------------------------------------
Test Set Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.33      0.25      0.29        12
     neutral       0.36      0.25      0.29        20
    positive       0.55      0.70      0.61        33

    accuracy                           0.48        65
   macro avg       0.41      0.40      0.40        65
weighted avg       0

## SVM, Pattern.nl and LUPJE

In [5]:
# Install libraries
!pip install imbalanced-learn
!pip install pattern
!pip install nltk



In [6]:
import re
import string
import nltk
import numpy as np
import pandas as pd
import random
from sklearn.svm import SVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split, cross_val_predict, StratifiedKFold
from imblearn.over_sampling import SMOTE
from sklearn.base import BaseEstimator, TransformerMixin
from scipy.sparse import hstack
from pattern.nl import sentiment

# Download NLTK data files
nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

# Function to set all seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)

# Set the seed for reproducibility
seed = 42
set_seed(seed)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Replace the numerical labels with the sentiment categories
def map_labels(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Load the second lexicon
words_sentiment_df = pd.DataFrame(columns=["word", "sentiment_score"])

with open("LUPJE.txt", "r") as file:
    for line in file:
        try:
            word, sentiment_score = line.strip().split("\t")
            words_sentiment_df = pd.concat([words_sentiment_df, pd.DataFrame({"word": [word], "sentiment_score": [sentiment_score]})])
        except ValueError:
            print(f"Skipping line with incorrect formatting: {line.strip()}")

# Convert sentiment scores to numeric type
words_sentiment_df["sentiment_score"] = pd.to_numeric(words_sentiment_df["sentiment_score"])

# Custom transformer for text preprocessing
class TextPreprocessor(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.stop_words = set(stopwords.words('dutch'))

    def fit(self, X, y=None):
        return self

    def transform(self, X):
        preprocessed_texts = [self.preprocess_text(text) for text in X]
        return preprocessed_texts

    def preprocess_text(self, text):
        # Convert to lowercase
        text = text.lower()
        # Remove punctuation and numbers
        text = re.sub(r'[^\w\s]', '', text)
        text = re.sub(r'\d+', '', text)
        # Tokenize
        tokens = word_tokenize(text, language='dutch')
        # Remove stop words
        tokens = [word for word in tokens if word not in self.stop_words]
        # Join tokens back to string
        return ' '.join(tokens)

# Custom transformer to add lexicon-based features from both lexicons
class CombinedLexiconTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self

    def transform(self, X):
        lexicon_features = []
        for text in X:
            # First lexicon (Pattern.nl sentiment)
            polarity_pattern, _ = sentiment(text)

            # Second lexicon (LUPJE)
            tokens = text.split()
            sentiment_score_lupje = 0
            for token in tokens:
                if token in words_sentiment_df["word"].values:
                    sentiment_score_lupje += words_sentiment_df.loc[words_sentiment_df["word"] == token, "sentiment_score"].values[0]

            lexicon_features.append([polarity_pattern, sentiment_score_lupje])
        return np.array(lexicon_features)

# Function to perform sentiment analysis and generate classification report
def cross_val_analysis(X_train_val, y_train_val, X_test, y_test, seed):
    text_preprocessor = TextPreprocessor()
    tfidf = TfidfVectorizer()
    lexicon_transformer = CombinedLexiconTransformer()
    scaler = MinMaxScaler()

    # Preprocess the text data
    X_train_val_preprocessed = text_preprocessor.fit_transform(X_train_val)
    X_test_preprocessed = text_preprocessor.transform(X_test)

    # Fit and transform the training data with TF-IDF and lexicon-based transformer
    X_train_val_tfidf = tfidf.fit_transform(X_train_val_preprocessed)
    X_train_val_lexicon = lexicon_transformer.fit_transform(X_train_val_preprocessed)
    X_train_val_lexicon = scaler.fit_transform(X_train_val_lexicon)  # Scale lexicon features
    X_train_val_combined = hstack([X_train_val_tfidf, X_train_val_lexicon])

    # Transform the test data with TF-IDF and lexicon-based transformer
    X_test_tfidf = tfidf.transform(X_test_preprocessed)
    X_test_lexicon = lexicon_transformer.transform(X_test_preprocessed)
    X_test_lexicon = scaler.transform(X_test_lexicon)  # Scale lexicon features
    X_test_combined = hstack([X_test_tfidf, X_test_lexicon])

    # Apply SMOTE to the training data
    smote = SMOTE(random_state=seed)
    X_resampled, y_resampled = smote.fit_resample(X_train_val_combined, y_train_val)

    svm_classifier = SVC(kernel='linear', random_state=seed)

    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=seed)
    y_pred_cv = cross_val_predict(svm_classifier, X_resampled, y_resampled, cv=skf)

    # Cross-validation classification report and confusion matrix
    report_cv = classification_report(y_resampled, y_pred_cv, zero_division=0)
    cm_cv = confusion_matrix(y_resampled, y_pred_cv, labels=["negative", "neutral", "positive"])

    # Train final model on the entire training+validation set and test on the unseen test set
    svm_classifier.fit(X_resampled, y_resampled)
    y_pred_test = svm_classifier.predict(X_test_combined)

    # Test set classification report and confusion matrix
    report_test = classification_report(y_test, y_pred_test, zero_division=0)
    cm_test = confusion_matrix(y_test, y_pred_test, labels=["negative", "neutral", "positive"])

    return report_cv, cm_cv, report_test, cm_test

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]  # use the name from the CSV files
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Split the dataset into 85% training+validation and 15% test sets
    X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.15, random_state=seed, stratify=y)

    # Further split the training+validation set into 70% training and 15% validation sets
    X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.1765, random_state=seed, stratify=y_train_val)

    # Combine training and validation sets for cross-validation
    X_train_val_combined = pd.concat([X_train, X_val])
    y_train_val_combined = pd.concat([y_train, y_val])

    # Perform cross-validation analysis
    report_cv, cm_cv, report_test, cm_test = cross_val_analysis(X_train_val_combined, y_train_val_combined, X_test, y_test, seed)

    # Print cross-validation classification report and confusion matrix
    print(f"Cross-Validation Classification Report for {dataset_name}:\n", report_cv)
    print(f"Cross-Validation Confusion Matrix for {dataset_name}:\n", cm_cv)
    print("-" * 50)

    # Print test set classification report and confusion matrix
    print(f"Test Set Classification Report for {dataset_name}:\n", report_test)
    print(f"Test Set Confusion Matrix for {dataset_name}:\n", cm_test)
    print("=" * 50)


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Processing 1960s_gas...
Cross-Validation Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.91      0.95      0.93       187
     neutral       0.78      0.79      0.79       187
    positive       0.78      0.73      0.76       187

    accuracy                           0.83       561
   macro avg       0.82      0.83      0.82       561
weighted avg       0.82      0.83      0.82       561

Cross-Validation Confusion Matrix for 1960s_gas:
 [[178   4   5]
 [  6 148  33]
 [ 12  38 137]]
--------------------------------------------------
Test Set Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.33      0.25      0.29        12
     neutral       0.27      0.15      0.19        20
    positive       0.53      0.73      0.62        33

    accuracy                           0.46        65
   macro avg       0.38      0.38      0.36        65
weighted avg       0

## RobBERT + Pattern.nl

In [None]:
# Install libraries
!pip install transformers
!pip install imbalanced-learn
!pip install torch
!pip install accelerate -U
!pip install datasets
!pip install pattern

Collecting pattern
  Downloading Pattern-3.6.0.tar.gz (22.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting backports.csv (from pattern)
  Downloading backports.csv-1.0.7-py2.py3-none-any.whl (12 kB)
Collecting mysqlclient (from pattern)
  Downloading mysqlclient-2.2.4.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.4/90.4 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting feedparser (from pattern)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m


In [None]:
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from datasets import Dataset
from imblearn.over_sampling import RandomOverSampler
from pattern.nl import sentiment

# Function to set all seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Setting the seed
set_seed(42)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Check if only the labels 0, 1 and 2 are present
def map_labels(label):
    if label == 0:
        return 0  # negative
    elif label == 1:
        return 1  # neutral
    elif label == 2:
        return 2  # positive
    else:
        return -1  # unknown

# Replace the numerical labels with the sentiment categories for the lexicon approach
def map_labels_lexicon(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Function to tokenize the texts
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Function to perform lexicon-based sentiment analysis
def lexicon_sentiment_analysis(texts):
    sentiment_scores = []
    for text in texts:
        polarity, _ = sentiment(text)
        sentiment_scores.append(polarity)
    return sentiment_scores

# Load model and tokenizer
model_name = "pdelobelle/robbert-v2-dutch-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Define a custom model class to accept additional lexicon score input
class RobertaWithLexicon(RobertaForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.lexicon_fc = nn.Linear(1, config.num_labels)  # Map lexicon score to the same output space

    def forward(self, input_ids, attention_mask=None, lexicon_score=None, labels=None):
        outputs = super().forward(input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits

        if lexicon_score is not None:
            lexicon_score = lexicon_score.unsqueeze(1).to(logits.device)
            lexicon_logits = self.lexicon_fc(lexicon_score)  # Map lexicon score to logits
            logits += lexicon_logits  # Add lexicon logits to BERT logits

        return (outputs.loss, logits) if labels is not None else logits

# Initialize the custom model
model = RobertaWithLexicon.from_pretrained(model_name, num_labels=3)

# Function to safely add the lexicon scores
def add_lexicon_score(dataset, scores):
    if 'lexicon_score' in dataset.column_names:
        dataset = dataset.remove_columns(['lexicon_score'])
    scores = scores.tolist()  # Convert to list if not already
    return dataset.add_column('lexicon_score', scores)

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Perform lexicon sentiment analysis and add the sentiment scores to the dataframe
    lexicon_scores = lexicon_sentiment_analysis(X)
    df = pd.DataFrame({'text': X, 'label': y, 'lexicon_score': lexicon_scores})

    train_val_df, test_df = train_test_split(df, test_size=0.15, random_state=42, stratify=df['label'])
    train_df, val_df = train_test_split(train_val_df, test_size=0.1765, random_state=42, stratify=train_val_df['label'])

    # Oversample the training data to handle class imbalance
    oversampler = RandomOverSampler(random_state=42)
    train_df_resampled, train_labels_resampled = oversampler.fit_resample(train_df[['text', 'lexicon_score']], train_df['label'])
    train_df_resampled['label'] = train_labels_resampled

    # Convert pandas DataFrames to Hugging Face Datasets
    train_dataset = Dataset.from_pandas(train_df_resampled)
    val_dataset = Dataset.from_pandas(val_df)
    test_dataset = Dataset.from_pandas(test_df)

    # Tokenize datasets
    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)
    test_dataset = test_dataset.map(tokenize_function, batched=True)

    # Add lexicon scores as additional feature
    train_dataset = add_lexicon_score(train_dataset, train_df_resampled['lexicon_score'])
    val_dataset = add_lexicon_score(val_dataset, val_df['lexicon_score'])
    test_dataset = add_lexicon_score(test_dataset, test_df['lexicon_score'])

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results/{dataset_name}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir=f'./logs/{dataset_name}',
        num_train_epochs=4,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=8,
        logging_steps=10,
        load_best_model_at_end=True,
        learning_rate=1e-4,
    )

    # Define Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
    )

    # Train the model
    trainer.train()

    # Evaluate the model
    print(f"Evaluating {dataset_name}...")
    eval_result = trainer.evaluate(eval_dataset=test_dataset)
    print(f"Test Set Evaluation for {dataset_name}:\n", eval_result)

    # Get predictions from the model
    predictions = trainer.predict(test_dataset)
    preds = predictions.predictions.argmax(-1)
    true_labels = test_dataset['label']

    # Generate classification report
    report = classification_report(true_labels, preds, target_names=["negative", "neutral", "positive"])
    print(f"Classification Report for {dataset_name}:\n", report)

    # Generate confusion matrix
    cm = confusion_matrix(true_labels, preds)
    print(f"Confusion Matrix for {dataset_name}:\n", cm)

    print("=" * 50)


Some weights of RobertaWithLexicon were not initialized from the model checkpoint at pdelobelle/robbert-v2-dutch-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'lexicon_fc.bias', 'lexicon_fc.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Processing 1960s_gas...


Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.197,1.155904
2,0.6822,1.133945
3,0.3503,1.337193
4,0.0942,1.450952


Evaluating 1960s_gas...


Test Set Evaluation for 1960s_gas:
 {'eval_loss': 1.0511798858642578, 'eval_runtime': 1.9832, 'eval_samples_per_second': 32.775, 'eval_steps_per_second': 4.538, 'epoch': 4.0}
Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.45      0.75      0.56        12
     neutral       0.00      0.00      0.00        20
    positive       0.57      0.76      0.65        33

    accuracy                           0.52        65
   macro avg       0.34      0.50      0.40        65
weighted avg       0.37      0.52      0.43        65

Confusion Matrix for 1960s_gas:
 [[ 9  0  3]
 [ 4  0 16]
 [ 7  1 25]]
Processing 1970s_gas...


Map:   0%|          | 0/114 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.091561
2,1.044900,1.457587
3,0.374500,1.907759
4,0.220600,1.756206


Evaluating 1970s_gas...


Test Set Evaluation for 1970s_gas:
 {'eval_loss': 0.8900591135025024, 'eval_runtime': 0.5172, 'eval_samples_per_second': 29.005, 'eval_steps_per_second': 3.867, 'epoch': 4.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report for 1970s_gas:
               precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.00      0.00      0.00         3
    positive       0.64      1.00      0.78         9

    accuracy                           0.67        15
   macro avg       0.55      0.44      0.43        15
weighted avg       0.59      0.67      0.57        15

Confusion Matrix for 1970s_gas:
 [[1 0 2]
 [0 0 3]
 [0 0 9]]
Processing 1980s_gas...


Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.1853,1.078205
2,1.0755,1.107207
3,0.9841,1.146729
4,0.8113,1.13191


Evaluating 1980s_gas...


Test Set Evaluation for 1980s_gas:
 {'eval_loss': 1.076548457145691, 'eval_runtime': 1.0676, 'eval_samples_per_second': 30.912, 'eval_steps_per_second': 4.684, 'epoch': 4.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report for 1980s_gas:
               precision    recall  f1-score   support

    negative       0.45      0.77      0.57        13
     neutral       0.00      0.00      0.00         7
    positive       0.36      0.31      0.33        13

    accuracy                           0.42        33
   macro avg       0.27      0.36      0.30        33
weighted avg       0.32      0.42      0.36        33

Confusion Matrix for 1980s_gas:
 [[10  0  3]
 [ 3  0  4]
 [ 9  0  4]]
Processing 1990s_gas...


Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.083399
2,No log,1.103045
3,1.032700,1.073438
4,1.032700,1.074149


Evaluating 1990s_gas...


Test Set Evaluation for 1990s_gas:
 {'eval_loss': 1.0107314586639404, 'eval_runtime': 0.2742, 'eval_samples_per_second': 29.172, 'eval_steps_per_second': 3.646, 'epoch': 4.0}
Classification Report for 1990s_gas:
               precision    recall  f1-score   support

    negative       0.50      0.50      0.50         4
     neutral       0.50      0.50      0.50         2
    positive       0.00      0.00      0.00         2

    accuracy                           0.38         8
   macro avg       0.33      0.33      0.33         8
weighted avg       0.38      0.38      0.38         8

Confusion Matrix for 1990s_gas:
 [[2 0 2]
 [1 1 0]
 [1 1 0]]


## BERTje + Pattern.nl

In [None]:
!pip install transformers
!pip install imbalanced-learn
!pip install torch
!pip install accelerate -U
!pip install datasets
!pip install pattern

Collecting pattern
  Downloading Pattern-3.6.0.tar.gz (22.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m61.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting backports.csv (from pattern)
  Downloading backports.csv-1.0.7-py2.py3-none-any.whl (12 kB)
Collecting mysqlclient (from pattern)
  Downloading mysqlclient-2.2.4.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.4/90.4 kB[0m [31m15.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting feedparser (from pattern)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m14.2 MB/s[0m eta [36m0:00:00[0m


In [None]:
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from datasets import Dataset
from imblearn.over_sampling import RandomOverSampler
from pattern.nl import sentiment

# Function to set all seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Setting the seed
set_seed(42)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Check if only the labels 0, 1 and 2 are present
def map_labels(label):
    if label == 0:
        return 0  # negative
    elif label == 1:
        return 1  # neutral
    elif label == 2:
        return 2  # positive
    else:
        return -1  # unknown

# Replace the numerical labels with the sentiment categories for the lexicon approach
def map_labels_lexicon(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Function to tokenize the texts
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Function to perform lexicon-based sentiment analysis
def lexicon_sentiment_analysis(texts):
    sentiment_scores = []
    for text in texts:
        polarity, _ = sentiment(text)
        sentiment_scores.append(polarity)
    return sentiment_scores

# Load model and tokenizer
model_name = "wietsedv/bert-base-dutch-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Define a custom model class to accept additional lexicon score input
class BertWithLexicon(BertForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.lexicon_fc = nn.Linear(1, config.num_labels)  # Map lexicon score to the same output space

    def forward(self, input_ids, attention_mask=None, lexicon_score=None, labels=None):
        outputs = super().forward(input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits

        if lexicon_score is not None:
            lexicon_score = lexicon_score.unsqueeze(1).to(logits.device)
            lexicon_logits = self.lexicon_fc(lexicon_score)  # Map lexicon score to logits
            logits += lexicon_logits  # Add lexicon logits to BERT logits

        return (outputs.loss, logits) if labels is not None else logits

# Initialize the custom model
model = BertWithLexicon.from_pretrained(model_name, num_labels=3)

# Function to safely add the lexicon scores
def add_lexicon_score(dataset, scores):
    if 'lexicon_score' in dataset.column_names:
        dataset = dataset.remove_columns(['lexicon_score'])
    scores = scores.tolist()  # Convert to list if not already
    return dataset.add_column('lexicon_score', scores)

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Perform lexicon sentiment analysis and add the sentiment scores to the dataframe
    lexicon_scores = lexicon_sentiment_analysis(X)
    df = pd.DataFrame({'text': X, 'label': y, 'lexicon_score': lexicon_scores})

    train_val_df, test_df = train_test_split(df, test_size=0.15, random_state=42, stratify=df['label'])
    train_df, val_df = train_test_split(train_val_df, test_size=0.1765, random_state=42, stratify=train_val_df['label'])

    # Oversample the training data to handle class imbalance
    oversampler = RandomOverSampler(random_state=42)
    train_df_resampled, train_labels_resampled = oversampler.fit_resample(train_df[['text', 'lexicon_score']], train_df['label'])
    train_df_resampled['label'] = train_labels_resampled

    # Convert pandas DataFrames to Hugging Face Datasets
    train_dataset = Dataset.from_pandas(train_df_resampled)
    val_dataset = Dataset.from_pandas(val_df)
    test_dataset = Dataset.from_pandas(test_df)

    # Tokenize datasets
    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)
    test_dataset = test_dataset.map(tokenize_function, batched=True)

    # Add lexicon scores as additional feature
    train_dataset = add_lexicon_score(train_dataset, train_df_resampled['lexicon_score'])
    val_dataset = add_lexicon_score(val_dataset, val_df['lexicon_score'])
    test_dataset = add_lexicon_score(test_dataset, test_df['lexicon_score'])

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results/{dataset_name}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir=f'./logs/{dataset_name}',
        num_train_epochs=4,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=8,
        logging_steps=10,
        load_best_model_at_end=True,
        learning_rate=1e-4,
    )

    # Define Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
    )

    # Train the model
    trainer.train()

    # Evaluate the model
    print(f"Evaluating {dataset_name}...")
    eval_result = trainer.evaluate(eval_dataset=test_dataset)
    print(f"Test Set Evaluation for {dataset_name}:\n", eval_result)

    # Get predictions from the model
    predictions = trainer.predict(test_dataset)
    preds = predictions.predictions.argmax(-1)
    true_labels = test_dataset['label']

    # Generate classification report
    report = classification_report(true_labels, preds, target_names=["negative", "neutral", "positive"])
    print(f"Classification Report for {dataset_name}:\n", report)

    # Generate confusion matrix
    cm = confusion_matrix(true_labels, preds)
    print(f"Confusion Matrix for {dataset_name}:\n", cm)

    print("=" * 50)


Some weights of BertWithLexicon were not initialized from the model checkpoint at wietsedv/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight', 'lexicon_fc.bias', 'lexicon_fc.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Processing 1960s_gas...


Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.1242,1.128516
2,0.4524,1.013469
3,0.1413,1.315779
4,0.0165,1.564219


Evaluating 1960s_gas...


Test Set Evaluation for 1960s_gas:
 {'eval_loss': 1.3837627172470093, 'eval_runtime': 1.8229, 'eval_samples_per_second': 35.657, 'eval_steps_per_second': 4.937, 'epoch': 4.0}
Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.33      0.17      0.22        12
     neutral       0.00      0.00      0.00        20
    positive       0.51      0.85      0.64        33

    accuracy                           0.46        65
   macro avg       0.28      0.34      0.29        65
weighted avg       0.32      0.46      0.36        65

Confusion Matrix for 1960s_gas:
 [[ 2  1  9]
 [ 2  0 18]
 [ 2  3 28]]
Processing 1970s_gas...


Map:   0%|          | 0/114 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.172462
2,1.008000,1.619502
3,0.210600,1.842738
4,0.049500,2.056618


Evaluating 1970s_gas...


Test Set Evaluation for 1970s_gas:
 {'eval_loss': 0.9468898177146912, 'eval_runtime': 0.4967, 'eval_samples_per_second': 30.199, 'eval_steps_per_second': 4.026, 'epoch': 4.0}


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report for 1970s_gas:
               precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.00      0.00      0.00         3
    positive       0.64      1.00      0.78         9

    accuracy                           0.67        15
   macro avg       0.55      0.44      0.43        15
weighted avg       0.59      0.67      0.57        15

Confusion Matrix for 1970s_gas:
 [[1 0 2]
 [0 0 3]
 [0 0 9]]
Processing 1980s_gas...


Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.1059,1.202203
2,0.6812,1.397081
3,0.282,1.65787
4,0.0973,1.862386


Evaluating 1980s_gas...


Test Set Evaluation for 1980s_gas:
 {'eval_loss': 1.2074679136276245, 'eval_runtime': 0.9958, 'eval_samples_per_second': 33.139, 'eval_steps_per_second': 5.021, 'epoch': 4.0}
Classification Report for 1980s_gas:
               precision    recall  f1-score   support

    negative       0.50      0.23      0.32        13
     neutral       0.29      0.71      0.42         7
    positive       0.40      0.31      0.35        13

    accuracy                           0.36        33
   macro avg       0.40      0.42      0.36        33
weighted avg       0.42      0.36      0.35        33

Confusion Matrix for 1980s_gas:
 [[3 5 5]
 [1 5 1]
 [2 7 4]]
Processing 1990s_gas...


Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.259775
2,No log,1.440391
3,0.697200,1.602657
4,0.697200,1.728466


Evaluating 1990s_gas...


Test Set Evaluation for 1990s_gas:
 {'eval_loss': 1.1769353151321411, 'eval_runtime': 0.2833, 'eval_samples_per_second': 28.243, 'eval_steps_per_second': 3.53, 'epoch': 4.0}
Classification Report for 1990s_gas:
               precision    recall  f1-score   support

    negative       0.50      0.50      0.50         4
     neutral       0.00      0.00      0.00         2
    positive       0.25      0.50      0.33         2

    accuracy                           0.38         8
   macro avg       0.25      0.33      0.28         8
weighted avg       0.31      0.38      0.33         8

Confusion Matrix for 1990s_gas:
 [[2 0 2]
 [1 0 1]
 [1 0 1]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## RobBERT + Pattern.nl + SVM

In [None]:
!pip install transformers
!pip install imbalanced-learn
!pip install torch
!pip install accelerate -U
!pip install datasets
!pip install pattern

Collecting pattern
  Downloading Pattern-3.6.0.tar.gz (22.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m64.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting backports.csv (from pattern)
  Downloading backports.csv-1.0.7-py2.py3-none-any.whl (12 kB)
Collecting mysqlclient (from pattern)
  Downloading mysqlclient-2.2.4.tar.gz (90 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m90.4/90.4 kB[0m [31m15.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Installing backend dependencies ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting feedparser (from pattern)
  Downloading feedparser-6.0.11-py3-none-any.whl (81 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.3/81.3 kB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0m


In [None]:
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from datasets import Dataset
from imblearn.over_sampling import RandomOverSampler
from pattern.nl import sentiment

# Function to set all seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Setting the seed
set_seed(42)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Check if only the labels 0, 1 and 2 are present
def map_labels(label):
    if label == 0:
        return 0  # negative
    elif label == 1:
        return 1  # neutral
    elif label == 2:
        return 2  # positive
    else:
        return -1  # unknown

# Replace the numerical labels with the sentiment categories for the lexicon approach
def map_labels_lexicon(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Function to tokenize the texts
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Function to perform lexicon-based sentiment analysis
def lexicon_sentiment_analysis(texts):
    sentiment_scores = []
    for text in texts:
        polarity, _ = sentiment(text)
        sentiment_scores.append(polarity)
    return sentiment_scores

# Load model and tokenizer
model_name = "pdelobelle/robbert-v2-dutch-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Define a custom model class to accept additional lexicon score input
class RobertaWithLexicon(RobertaForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.lexicon_fc = nn.Linear(1, config.num_labels)  # Map lexicon score to the same output space

    def forward(self, input_ids, attention_mask=None, lexicon_score=None, labels=None):
        outputs = super().forward(input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits

        if lexicon_score is not None:
            lexicon_score = lexicon_score.unsqueeze(1).to(logits.device)
            lexicon_logits = self.lexicon_fc(lexicon_score)  # Map lexicon score to logits
            logits += lexicon_logits  # Add lexicon logits to RobBERT logits

        if labels is not None:
            loss = outputs.loss
            return loss, logits
        else:
            return logits

# Initialize the custom model
model = RobertaWithLexicon.from_pretrained(model_name, num_labels=3)

# Function to safely add the lexicon scores
def add_lexicon_score(dataset, scores):
    if 'lexicon_score' in dataset.column_names:
        dataset = dataset.remove_columns(['lexicon_score'])
    scores = scores.tolist()  # Convert to list if not already
    return dataset.add_column('lexicon_score', scores)

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Perform lexicon sentiment analysis and add the sentiment scores to the dataframe
    lexicon_scores = lexicon_sentiment_analysis(X)
    df = pd.DataFrame({'text': X, 'label': y, 'lexicon_score': lexicon_scores})

    train_val_df, test_df = train_test_split(df, test_size=0.15, random_state=42, stratify=df['label'])
    train_df, val_df = train_test_split(train_val_df, test_size=0.1765, random_state=42, stratify=train_val_df['label'])

    # Oversample the training data to handle class imbalance
    oversampler = RandomOverSampler(random_state=42)
    train_df_resampled, train_labels_resampled = oversampler.fit_resample(train_df[['text', 'lexicon_score']], train_df['label'])
    train_df_resampled['label'] = train_labels_resampled

    # Convert pandas DataFrames to Hugging Face Datasets
    train_dataset = Dataset.from_pandas(train_df_resampled)
    val_dataset = Dataset.from_pandas(val_df)
    test_dataset = Dataset.from_pandas(test_df)

    # Tokenize datasets
    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)
    test_dataset = test_dataset.map(tokenize_function, batched=True)

    # Add lexicon scores as additional feature
    train_dataset = add_lexicon_score(train_dataset, train_df_resampled['lexicon_score'])
    val_dataset = add_lexicon_score(val_dataset, val_df['lexicon_score'])
    test_dataset = add_lexicon_score(test_dataset, test_df['lexicon_score'])

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results/{dataset_name}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir=f'./logs/{dataset_name}',
        num_train_epochs=4,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=8,
        logging_steps=10,
        load_best_model_at_end=True,
        learning_rate=1e-4,
    )

    # Define Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
    )

    # Train the model
    trainer.train()

    # Get predictions and probabilities from the model for the training set
    train_predictions = trainer.predict(train_dataset)
    train_logits = train_predictions.predictions
    train_probs = torch.softmax(torch.tensor(train_logits), dim=-1).numpy()

    # Get predictions and probabilities from the model for the test set
    test_predictions = trainer.predict(test_dataset)
    test_logits = test_predictions.predictions
    test_probs = torch.softmax(torch.tensor(test_logits), dim=-1).numpy()

    # Prepare features for SVM
    train_features = pd.DataFrame(train_probs, columns=["prob_neg", "prob_neu", "prob_pos"])
    train_features["lexicon_score"] = train_df_resampled["lexicon_score"].values
    train_labels = train_df_resampled["label"].values

    test_features = pd.DataFrame(test_probs, columns=["prob_neg", "prob_neu", "prob_pos"])
    test_features["lexicon_score"] = test_df["lexicon_score"].values
    test_labels = test_df["label"].values

    # Train SVM
    svm = SVC(kernel='linear', probability=True, random_state=42)
    svm.fit(train_features, train_labels)

    # Evaluate SVM
    svm_predictions = svm.predict(test_features)
    report = classification_report(test_labels, svm_predictions, target_names=["negative", "neutral", "positive"])
    print(f"Classification Report for {dataset_name}:\n", report)

    cm = confusion_matrix(test_labels, svm_predictions)
    print(f"Confusion Matrix for {dataset_name}:\n", cm)

    print("=" * 50)


Some weights of RobertaWithLexicon were not initialized from the model checkpoint at pdelobelle/robbert-v2-dutch-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight', 'lexicon_fc.bias', 'lexicon_fc.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Processing 1960s_gas...


Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.197,1.155904
2,0.6822,1.133945
3,0.3503,1.337193
4,0.0942,1.450952


Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.44      0.58      0.50        12
     neutral       0.33      0.25      0.29        20
    positive       0.59      0.61      0.60        33

    accuracy                           0.49        65
   macro avg       0.45      0.48      0.46        65
weighted avg       0.48      0.49      0.48        65

Confusion Matrix for 1960s_gas:
 [[ 7  3  2]
 [ 3  5 12]
 [ 6  7 20]]
Processing 1970s_gas...


Map:   0%|          | 0/114 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.091561
2,1.044900,1.457587
3,0.374500,1.907759
4,0.220600,1.756206


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report for 1970s_gas:
               precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.00      0.00      0.00         3
    positive       0.64      1.00      0.78         9

    accuracy                           0.67        15
   macro avg       0.55      0.44      0.43        15
weighted avg       0.59      0.67      0.57        15

Confusion Matrix for 1970s_gas:
 [[1 0 2]
 [0 0 3]
 [0 0 9]]
Processing 1980s_gas...


Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.1853,1.078205
2,1.0755,1.107207
3,0.9841,1.146729
4,0.8113,1.13191


Classification Report for 1980s_gas:
               precision    recall  f1-score   support

    negative       0.37      0.54      0.44        13
     neutral       0.20      0.29      0.24         7
    positive       0.75      0.23      0.35        13

    accuracy                           0.36        33
   macro avg       0.44      0.35      0.34        33
weighted avg       0.48      0.36      0.36        33

Confusion Matrix for 1980s_gas:
 [[7 6 0]
 [4 2 1]
 [8 2 3]]
Processing 1990s_gas...


Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.083399
2,No log,1.103045
3,1.032700,1.073438
4,1.032700,1.074149


Classification Report for 1990s_gas:
               precision    recall  f1-score   support

    negative       1.00      0.50      0.67         4
     neutral       0.50      0.50      0.50         2
    positive       0.25      0.50      0.33         2

    accuracy                           0.50         8
   macro avg       0.58      0.50      0.50         8
weighted avg       0.69      0.50      0.54         8

Confusion Matrix for 1990s_gas:
 [[2 0 2]
 [0 1 1]
 [0 1 1]]


## BERTje + Pattern.nl + SVM

In [None]:
!pip install transformers
!pip install imbalanced-learn
!pip install torch
!pip install accelerate -U
!pip install datasets
!pip install pattern

Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cached nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl (121.6 MB)
Collecting nvidia-curand-cu12==10.3.2.106 (from torch)
  Using cached nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl (56.5 MB)
Collectin

In [None]:
import pandas as pd
import numpy as np
import random
import torch
from torch import nn
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.svm import SVC
from datasets import Dataset
from imblearn.over_sampling import RandomOverSampler
from pattern.nl import sentiment

# Function to set all seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# Setting the seed
set_seed(42)

# Function to load a single dataset
def load_dataset(filename):
    df = pd.read_csv(filename)
    return df['text'], df['labels']

# Check if only the labels 0, 1 and 2 are present
def map_labels(label):
    if label == 0:
        return 0  # negative
    elif label == 1:
        return 1  # neutral
    elif label == 2:
        return 2  # positive
    else:
        return -1  # unknown

# Replace the numerical labels with the sentiment categories for the lexicon approach
def map_labels_lexicon(label):
    if label == 0:
        return "negative"
    elif label == 1:
        return "neutral"
    elif label == 2:
        return "positive"
    else:
        return "unknown"

# Function to tokenize the texts
def tokenize_function(examples):
    return tokenizer(examples['text'], padding='max_length', truncation=True)

# Function to perform lexicon-based sentiment analysis
def lexicon_sentiment_analysis(texts):
    sentiment_scores = []
    for text in texts:
        polarity, _ = sentiment(text)
        sentiment_scores.append(polarity)
    return sentiment_scores

# Load model and tokenizer
model_name = "wietsedv/bert-base-dutch-cased"
tokenizer = BertTokenizer.from_pretrained(model_name)

# List of datasets
dataset_paths = ["1960s_gas.csv", "1970s_gas.csv", "1980s_gas.csv", "1990s_gas.csv"]

# Define a custom model class to accept additional lexicon score input
class BertWithLexicon(BertForSequenceClassification):
    def __init__(self, config):
        super().__init__(config)
        self.lexicon_fc = nn.Linear(1, config.num_labels)  # Map lexicon score to the same output space

    def forward(self, input_ids, attention_mask=None, lexicon_score=None, labels=None):
        outputs = super().forward(input_ids, attention_mask=attention_mask, labels=labels)
        logits = outputs.logits

        if lexicon_score is not None:
            lexicon_score = lexicon_score.unsqueeze(1).to(logits.device)
            lexicon_logits = self.lexicon_fc(lexicon_score)  # Map lexicon score to logits
            logits += lexicon_logits  # Add lexicon logits to BERT logits

        return (outputs.loss, logits) if labels is not None else logits

# Initialize the custom model
model = BertWithLexicon.from_pretrained(model_name, num_labels=3)

# Function to safely add the lexicon scores
def add_lexicon_score(dataset, scores):
    if 'lexicon_score' in dataset.column_names:
        dataset = dataset.remove_columns(['lexicon_score'])
    scores = scores.tolist()  # Convert to list if not already
    return dataset.add_column('lexicon_score', scores)

# Iterate over each dataset path in the list
for dataset_path in dataset_paths:
    dataset_name = dataset_path.split(".")[0]
    print(f"Processing {dataset_name}...")

    # Load dataset
    X, y = load_dataset(dataset_path)

    # Map numerical labels to sentiment categories for ground truth
    y = y.apply(map_labels)

    # Perform lexicon sentiment analysis and add the sentiment scores to the dataframe
    lexicon_scores = lexicon_sentiment_analysis(X)
    df = pd.DataFrame({'text': X, 'label': y, 'lexicon_score': lexicon_scores})

    train_val_df, test_df = train_test_split(df, test_size=0.15, random_state=42, stratify=df['label'])
    train_df, val_df = train_test_split(train_val_df, test_size=0.1765, random_state=42, stratify=train_val_df['label'])

    # Oversample the training data to handle class imbalance
    oversampler = RandomOverSampler(random_state=42)
    train_df_resampled, train_labels_resampled = oversampler.fit_resample(train_df[['text', 'lexicon_score']], train_df['label'])
    train_df_resampled['label'] = train_labels_resampled

    # Convert pandas DataFrames to Hugging Face Datasets
    train_dataset = Dataset.from_pandas(train_df_resampled)
    val_dataset = Dataset.from_pandas(val_df)
    test_dataset = Dataset.from_pandas(test_df)

    # Tokenize datasets
    train_dataset = train_dataset.map(tokenize_function, batched=True)
    val_dataset = val_dataset.map(tokenize_function, batched=True)
    test_dataset = test_dataset.map(tokenize_function, batched=True)

    # Add lexicon scores as additional feature
    train_dataset = add_lexicon_score(train_dataset, train_df_resampled['lexicon_score'])
    val_dataset = add_lexicon_score(val_dataset, val_df['lexicon_score'])
    test_dataset = add_lexicon_score(test_dataset, test_df['lexicon_score'])

    # Set format for PyTorch
    train_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    val_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])
    test_dataset.set_format(type='torch', columns=['input_ids', 'attention_mask', 'label', 'lexicon_score'])

    # Define training arguments
    training_args = TrainingArguments(
        output_dir=f'./results/{dataset_name}',
        evaluation_strategy="epoch",
        save_strategy="epoch",
        logging_dir=f'./logs/{dataset_name}',
        num_train_epochs=4,
        per_device_train_batch_size=16,
        per_device_eval_batch_size=8,
        logging_steps=10,
        load_best_model_at_end=True,
        learning_rate=1e-4,
    )

    # Define Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset,
        tokenizer=tokenizer,
    )

    # Train the model
    trainer.train()

    # Get predictions and probabilities from the model for the training set
    train_predictions = trainer.predict(train_dataset)
    train_logits = train_predictions.predictions
    train_probs = torch.softmax(torch.tensor(train_logits), dim=-1).numpy()

    # Get predictions and probabilities from the model for the test set
    test_predictions = trainer.predict(test_dataset)
    test_logits = test_predictions.predictions
    test_probs = torch.softmax(torch.tensor(test_logits), dim=-1).numpy()

    # Prepare features for SVM
    train_features = pd.DataFrame(train_probs, columns=["prob_neg", "prob_neu", "prob_pos"])
    train_features["lexicon_score"] = train_df_resampled["lexicon_score"].values
    train_labels = train_df_resampled["label"].values

    test_features = pd.DataFrame(test_probs, columns=["prob_neg", "prob_neu", "prob_pos"])
    test_features["lexicon_score"] = test_df["lexicon_score"].values
    test_labels = test_df["label"].values

    # Train SVM
    svm = SVC(kernel='linear', probability=True, random_state=42)
    svm.fit(train_features, train_labels)

    # Evaluate SVM
    svm_predictions = svm.predict(test_features)
    report = classification_report(test_labels, svm_predictions, target_names=["negative", "neutral", "positive"])
    print(f"Classification Report for {dataset_name}:\n", report)

    cm = confusion_matrix(test_labels, svm_predictions)
    print(f"Confusion Matrix for {dataset_name}:\n", cm)

    print("=" * 50)


tokenizer_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/242k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/608 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/437M [00:00<?, ?B/s]

Some weights of BertWithLexicon were not initialized from the model checkpoint at wietsedv/bert-base-dutch-cased and are newly initialized: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'classifier.bias', 'classifier.weight', 'lexicon_fc.bias', 'lexicon_fc.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Processing 1960s_gas...


Map:   0%|          | 0/462 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]

Map:   0%|          | 0/65 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.1242,1.128516
2,0.4524,1.013469
3,0.1413,1.315783
4,0.0165,1.56424


Classification Report for 1960s_gas:
               precision    recall  f1-score   support

    negative       0.33      0.17      0.22        12
     neutral       0.17      0.05      0.08        20
    positive       0.51      0.82      0.63        33

    accuracy                           0.46        65
   macro avg       0.34      0.34      0.31        65
weighted avg       0.37      0.46      0.38        65

Confusion Matrix for 1960s_gas:
 [[ 2  1  9]
 [ 2  1 17]
 [ 2  4 27]]
Processing 1970s_gas...


Map:   0%|          | 0/114 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]

Map:   0%|          | 0/15 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.172436
2,1.008000,1.619533
3,0.210600,1.842796
4,0.049500,2.056671


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Classification Report for 1970s_gas:
               precision    recall  f1-score   support

    negative       1.00      0.33      0.50         3
     neutral       0.00      0.00      0.00         3
    positive       0.64      1.00      0.78         9

    accuracy                           0.67        15
   macro avg       0.55      0.44      0.43        15
weighted avg       0.59      0.67      0.57        15

Confusion Matrix for 1970s_gas:
 [[1 0 2]
 [0 0 3]
 [0 0 9]]
Processing 1980s_gas...


Map:   0%|          | 0/186 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]

Map:   0%|          | 0/33 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,1.1059,1.202232
2,0.6812,1.393971
3,0.28,1.663797
4,0.0953,1.874945


Classification Report for 1980s_gas:
               precision    recall  f1-score   support

    negative       0.67      0.46      0.55        13
     neutral       0.33      0.14      0.20         7
    positive       0.52      0.85      0.65        13

    accuracy                           0.55        33
   macro avg       0.51      0.48      0.46        33
weighted avg       0.54      0.55      0.51        33

Confusion Matrix for 1980s_gas:
 [[ 6  2  5]
 [ 1  1  5]
 [ 2  0 11]]
Processing 1990s_gas...


Map:   0%|          | 0/51 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]

Map:   0%|          | 0/8 [00:00<?, ? examples/s]



Epoch,Training Loss,Validation Loss
1,No log,1.259774
2,No log,1.440427
3,0.697300,1.602747
4,0.697300,1.728579


Classification Report for 1990s_gas:
               precision    recall  f1-score   support

    negative       0.43      0.75      0.55         4
     neutral       0.00      0.00      0.00         2
    positive       0.00      0.00      0.00         2

    accuracy                           0.38         8
   macro avg       0.14      0.25      0.18         8
weighted avg       0.21      0.38      0.27         8

Confusion Matrix for 1990s_gas:
 [[3 0 1]
 [2 0 0]
 [2 0 0]]


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
