In [None]:
import pandas as pd
df = pd.read_csv("cleaning/tweets_bow.csv")

import time
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, f1_score, precision_score, recall_score


df['label'] = df['label'].apply(lambda x: 0 if x == 0 else 1)

X = df.drop("label", axis=1)
y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)


start_time = time.time()
model = LogisticRegression(solver='liblinear', random_state=42)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)
end_time = time.time()
elapsed_time = end_time - start_time

# Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
weighted_f1 = f1_score(y_test, y_pred, average='weighted')

print(f"Time to train and predict: {elapsed_time:.4f} seconds")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"Recall (weighted): {recall:.4f}")
print(f"Weighted F1-score: {weighted_f1:.4f}\n")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred,digits=4))
