In [3]:
import streamlit as st
import joblib
import numpy as np
from sklearn.metrics import classification_report

# Load the saved models and vectorizer
log_reg_model = joblib.load("optimized_log_reg_model.pkl")
tfidf_vectorizer = joblib.load("optimized_tfidf_vectorizer.pkl")

# Set up the Streamlit app interface
st.title("News Categorization Model Comparison")

# Display the performance metrics of the models
st.header("Model Performance Comparison")

st.subheader("Accuracy Comparison")
accuracy_scores = {
    "Logistic Regression": 0.85,  # Replace with actual results
    "Random Forest": 0.82,
    "Naive Bayes": 0.75,
    "SVM": 0.88
}
for model, accuracy in accuracy_scores.items():
    st.write(f"{model}: {accuracy:.4f}")

# Allow the user to input text for prediction
st.header("Enter News Content for Prediction")

user_input = st.text_area("Enter the news headline/content:")

if user_input:
    # Vectorize the user input
    input_tfidf = tfidf_vectorizer.transform([user_input])

    # Make predictions
    prediction = log_reg_model.predict(input_tfidf)  # Assuming Logistic Regression is the best model
    prediction_prob = log_reg_model.predict_proba(input_tfidf)

    st.write(f"Predicted Category: {prediction[0]}")
    st.write(f"Prediction Confidence: {np.max(prediction_prob) * 100:.2f}%")

# Display the classification report if checkbox is selected
if st.checkbox("Show Classification Report"):
    # Assuming you have the test data available in your environment
    y_test_pred = log_reg_model.predict(X_test_tfidf)  # Replace X_test_tfidf with your actual test data
    report = classification_report(y_test, y_test_pred)
    st.text(report)
