In [None]:
import joblib
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV, KFold
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import warnings

# Suppress minor warnings for a cleaner output
warnings.filterwarnings('ignore')

def train_and_evaluate(X_train_tfidf, X_test_tfidf, y_train, y_test, vectorizer):
    """
    Runs Grid Search CV to tune Logistic Regression, evaluates the best model,
    and saves the champion model and vectorizer to the 'models/' directory.
    """
    print("\n--- Model 4: Hyperparameter Tuning with GridSearchCV ---")
    
    # Define the hyperparameter grid (exactly as in your notebook)
    param_grid = {
        'C': [0.1, 1, 10],          
        'solver': ['liblinear', 'saga']  
    }

    # Setup for GridSearchCV
    logreg_tuned = LogisticRegression(random_state=42, max_iter=1000)
    kfold_cv_inner = KFold(n_splits=5, shuffle=True, random_state=42)

    grid_search = GridSearchCV(
        estimator=logreg_tuned,
        param_grid=param_grid,
        cv=kfold_cv_inner,
        scoring='f1', 
        n_jobs=-1,
        verbose=1
    )

    print("Starting GridSearchCV with 5-Fold Cross-Validation...")
    grid_search.fit(X_train_tfidf, y_train)

    # Retrieve the best estimator
    best_logreg = grid_search.best_estimator_
    print(f"\nBest Hyperparameters found: {grid_search.best_params_}")

    # Evaluate the best tuned model on the unseen test set
    y_pred_best = best_logreg.predict(X_test_tfidf)
    final_accuracy = accuracy_score(y_test, y_pred_best)
    
    print(f"\nAccuracy (Best Tuned Model) on Test Set: {final_accuracy:.4f}")
    print("Classification Report (Best Tuned Model):\n", classification_report(y_test, y_pred_best))
    
    cm = confusion_matrix(y_test, y_pred_best)
    print("\nConfusion Matrix for the Optimal Tuned Model")
    print(cm)

    # --- Saving the Champion Model and Vectorizer (CRITICAL STEP) ---
    # NOTE: Paths are relative to the project root for consistency.
    joblib.dump(best_logreg, 'models/champion_lr_model.pkl')
    joblib.dump(vectorizer, 'models/fitted_vectorizer.pkl')
    print("\nSUCCESS: Champion model and fitted vectorizer saved to the 'models/' folder.")
    
    return final_accuracy