In [None]:
import pandas as pd
import numpy as np
import re
import spacy
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, f1_score, precision_score, recall_score

In [None]:
# Load spaCy model
nlp = spacy.load("en_core_web_sm")

# Load dataset
df = pd.read_excel("/mnt/data/awaise author data set origional - Copy.xlsx")

def preprocess_text(text):
    text = text.lower()
    text = re.sub(f"[{string.punctuation}]", "", text)  # Remove punctuation
    doc = nlp(text)
    tokens = [token.lemma_ for token in doc if not token.is_stop]  # Remove stopwords & lemmatization
    return " ".join(tokens)

In [None]:
# Apply preprocessing
df['processed_text'] = df['sentence'].astype(str).apply(preprocess_text)

# Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['processed_text'])
y = df['ground polarity']  # Assuming 'ground polarity' is the target label

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
 # Model training and evaluation
def train_and_evaluate(model, model_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    macro_f1 = f1_score(y_test, y_pred, average='macro')
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    
    print(f"Results for {model_name}:")
    print(classification_report(y_test, y_pred))
    print(f"Macro F1 Score: {macro_f1}")
    print(f"Precision: {precision}")
    print(f"Recall: {recall}\n")
    
    return {"Model": model_name, "Macro F1 Score": macro_f1, "Precision": precision, "Recall": recall}


In [None]:



# Store results
results = []

# Naïve Bayes
nb_model = MultinomialNB()
results.append(train_and_evaluate(nb_model, "Naïve Bayes"))

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
results.append(train_and_evaluate(rf_model, "Random Forest"))

# J48 (Decision Tree)
dt_model = DecisionTreeClassifier()
results.append(train_and_evaluate(dt_model, "J48 (Decision Tree)"))

# SVM
svm_model = SVC(kernel='linear')
results.append(train_and_evaluate(svm_model, "Support Vector Machine"))

# Convert results to DataFrame and display
results_df = pd.DataFrame(results)
print(results_df)