In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neural_network import MLPClassifier

# Các model & param grid (không scaler vì dữ liệu sparse)
pipelines = {}
param_grids = {}

# 1. Naive Bayes
pipelines['MultinomialNB'] = MultinomialNB()
param_grids['MultinomialNB'] = {
    'alpha': [0.001, 0.01, 0.1, 0.5, 1.0]
}

# 2. KNN (ít phù hợp cho sparse, nhưng vẫn thử)
pipelines['KNN'] = KNeighborsClassifier()
param_grids['KNN'] = {
    'n_neighbors': [3, 5, 7, 11],
    'weights': ['uniform', 'distance']
}

# 3. Linear SVM
pipelines['LinearSVC'] = LinearSVC(max_iter=5000)
param_grids['LinearSVC'] = {
    'C': [0.01, 0.1, 1, 10, 100]
}

# 4. Decision Tree
pipelines['DecisionTree'] = DecisionTreeClassifier(random_state=42)
param_grids['DecisionTree'] = {
    'max_depth': [None, 10, 20, 50],
    'min_samples_leaf': [1, 2, 5]
}

# 5. Random Forest
pipelines['RandomForest'] = RandomForestClassifier(random_state=42, n_jobs=-1)
param_grids['RandomForest'] = {
    'n_estimators': [100, 300, 500],
    'max_depth': [None, 20, 50],
    'min_samples_leaf': [1, 2, 5]
}

# 6. Gradient Boosting
pipelines['GradientBoosting'] = GradientBoostingClassifier(random_state=42)
param_grids['GradientBoosting'] = {
    'n_estimators': [100, 200, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'max_depth': [2, 3, 5]
}

# 7. MLP (neural net)
pipelines['MLP'] = MLPClassifier(max_iter=500, random_state=42)
param_grids['MLP'] = {
    'hidden_layer_sizes': [(50,), (100,), (100,50)],
    'alpha': [1e-5, 1e-4, 1e-3]
}

# 8. Logistic Regression (baseline mạnh cho text)
pipelines['LogisticRegression'] = LogisticRegression(solver='liblinear', random_state=42)
param_grids['LogisticRegression'] = {
    'C': [0.01, 0.1, 1, 10, 100]
}

# Cross-validation & scoring
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = 'precision' 

best_results = {}

for name, model in pipelines.items():
    print("Tuning", name)
    gs = GridSearchCV(model, param_grids[name], cv=cv, scoring=scoring, n_jobs=-1, verbose=1)
    gs.fit(train_x_vec, train_y.ravel())

    best_results[name] = {
        'best_score': gs.best_score_,
        'best_params': gs.best_params_,
        'best_estimator': gs.best_estimator_
    }

    print(f"{name}: best_score={gs.best_score_:.4f}, best_params={gs.best_params_}")

# Convert sang DataFrame để dễ xem
results_df = pd.DataFrame([
    {"Model": k, "Precision": v['best_score'], "Best Params": v['best_params']}
    for k,v in best_results.items()
]).sort_values(by="Precision", ascending=False)

display(results_df)


### 1. Results and Performance Hierarchy

The benchmarking tested eight distinct classification models, ranging from linear models and probabilistic approaches (Naive Bayes) to ensemble methods (Gradient Boosting, Random Forest) and neural networks (MLP).

The result confirmed that the optimal precision score achieved is **0.996497**, attained by two linear classifiers

1.  **LinearSVC (Support Vector Classification)**
2.  **LogisticRegression**

| Rank | Model Name | Precision | Classification Type |
| :--- | :--- | :--- | :--- |
| **1** | **LinearSVC** | **0.996497** | Discriminative, Non-parametric (SVM) |
| **1** | **LogisticRegression** | **0.996497** | Discriminative, Parametric (GLM) |
| 3 | GradientBoosting | 0.996397 | Discriminative, Non-parametric (Ensemble) |
| 4 | MLP | 0.995258 | Discriminative, Parametric (Neural Network) |
| 5 | MultinomialNB | 0.994525 | Generative, Parametric (Naive Bayes) |

### 2. Analysis: Can We Do Better?

The comparison demonstrates that the highly-optimized **Logistic Regression** model proved resilient, matching the performance of the best competing classifier, **LinearSVC**.

1.  **Superiority of Linear Classifiers:** The joint success of Logistic Regression and LinearSVC indicates that the feature representation (derived from word frequency counts) allows the sentiment classes to be separated almost perfectly by a **linear decision boundary**. This result suggests that more complex models like MLPs or ensemble methods could not effectively utilize additional non-linear information to surpass this linear threshold in terms of precision.
2.  **Model Convergence:** The achieved precision of $\approx 0.9965$ is extremely high (where precision is calculated as $TP / (TP + FP)$). This suggests that the current feature set has reached a practical ceiling for classifying these tweets, as reducing the remaining false positives is exceptionally difficult.
3.  **Baseline Challenge Outcome:** While initial, less optimized implementations of Logistic Regression or simpler decision functions might have yielded scores in the $\approx 0.994 - 0.9960$ range, the optimized Logistic Regression model successfully matched the performance of the most robust ML classifier (LinearSVC). Therefore, among the tested classical ML models, the answer is **No, we cannot do better** than the performance achieved by these two optimized linear models.

### 3. Potential Avenues for Further Improvement

To marginally increase performance beyond $0.996497$, focus must shift away from model selection within the currently defined set, towards feature engineering and advanced external techniques:

*   **Expanded Feature Set:** Re-running the experiment using an **expanded feature vector (6 features in total)** (Exercise 6) could potentially introduce subtle non-linear characteristics or better separability, slightly raising the precision ceiling.
*   **Advanced Meta-Learning:** Employing sophisticated ensemble methods such as **Stacking** could combine the strengths of the top classifiers (LinearSVC, LogisticRegression, GradientBoosting) to potentially achieve marginal gains.
*   **External Benchmarking:** The ultimate challenge to surpass this performance involves comparing it against external state-of-the-art Natural Language Processing tools, specifically **Virtual Assistants (LLMs)** like ChatGPT, as proposed in the follow-up assignment (Exercise 8).