In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)

data = pd.read_csv("Data.csv")


X = data["text"]
y = data["label"]

print("\n--- Feature Extraction ---")

vectorizer = TfidfVectorizer(stop_words="english")
X_features = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_features,
    y,
    test_size=0.30,
    random_state=42,
    stratify=y
)

svm = SVC()

param_grid = {
    "kernel": ["linear", "rbf"],
    "C": [0.1, 1, 10]
}

grid = GridSearchCV(svm, param_grid, cv=2, scoring="f1_weighted")
grid.fit(X_train, y_train)

best_svm = grid.best_estimator_
svm_pred = best_svm.predict(X_test)

print("\n==============================")
print("        SVM RESULTS")
print("==============================")
print("Best Parameters :", grid.best_params_)
print("Accuracy        :", accuracy_score(y_test, svm_pred))
print("Precision       :", precision_score(y_test, svm_pred, average="weighted", zero_division=0))
print("Recall          :", recall_score(y_test, svm_pred, average="weighted", zero_division=0))
print("F1-score        :", f1_score(y_test, svm_pred, average="weighted", zero_division=0))
print("Confusion Matrix:\n", confusion_matrix(y_test, svm_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, svm_pred, zero_division=0))

rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train, y_train)
rf_pred = rf.predict(X_test)

print("\n==============================")
print("   RANDOM FOREST RESULTS")
print("==============================")
print("Accuracy        :", accuracy_score(y_test, rf_pred))
print("Precision       :", precision_score(y_test, rf_pred, average="weighted", zero_division=0))
print("Recall          :", recall_score(y_test, rf_pred, average="weighted", zero_division=0))
print("F1-score        :", f1_score(y_test, rf_pred, average="weighted", zero_division=0))
print("Confusion Matrix:\n", confusion_matrix(y_test, rf_pred))
print("\nClassification Report:\n")
print(classification_report(y_test, rf_pred, zero_division=0))



--- Feature Extraction ---

        SVM RESULTS
Best Parameters : {'C': 10, 'kernel': 'linear'}
Accuracy        : 0.4166666666666667
Precision       : 0.5714285714285715
Recall          : 0.4166666666666667
F1-score        : 0.42500000000000004
Confusion Matrix:
 [[2 0 0 1]
 [1 1 1 0]
 [2 0 1 0]
 [2 0 0 1]]

Classification Report:

               precision    recall  f1-score   support

     business       0.29      0.67      0.40         3
entertainment       1.00      0.33      0.50         3
       sports       0.50      0.33      0.40         3
   technology       0.50      0.33      0.40         3

     accuracy                           0.42        12
    macro avg       0.57      0.42      0.43        12
 weighted avg       0.57      0.42      0.43        12


   RANDOM FOREST RESULTS
Accuracy        : 0.4166666666666667
Precision       : 0.4583333333333333
Recall          : 0.4166666666666667
F1-score        : 0.35000000000000003
Confusion Matrix:
 [[0 0 0 3]
 [0 1 1 1]
 [0 0 