Deployment by implementing Streamlit

In [3]:
import streamlit as st
import joblib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from io import BytesIO
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

# Load your pre-trained models and vectorizer
lr_model = joblib.load('5 Unsupervised Sentiment Analysis/lexicon_models_fextract/models/best_lr_model_afinn.pkl')
count_vectorizer = joblib.load('5 Unsupervised Sentiment Analysis/lexicon_models_fextract/feature_extract/lr_tfidf_afinn.pkl')
lda_model = joblib.load('7 Topics Modelling/lda_models_fetract/models/lda_bow_with_balance.pkl')
lda_vectorizer = joblib.load('7 Topics Modelling/lda_models_fetract/feature_extract/bow_vectorizer_with_balance.pkl')

# Define a function to predict sentiment
def predict_sentiment(headline):
    # Transform headline using the count_vectorizer
    transformed_headline = count_vectorizer.transform([headline])
    # Predict sentiment using the logistic regression model
    sentiment = lr_model.predict(transformed_headline)
    return sentiment[0]

# Define a function to get topic information
def get_topics(article):
    # Transform article using the lda_vectorizer
    transformed_article = lda_vectorizer.transform([article])
    # Get the topic distribution
    topic_distribution = lda_model.transform(transformed_article)
    
    # Colors for each topic
    colors = plt.cm.tab10(np.linspace(0, 1, len(lda_model.components_)))
    
    # Plot top words for each topic
    fig, axes = plt.subplots(2, 5, figsize=(15, 6), sharex=False, sharey=False)
    axes = axes.flatten()
    feature_names = lda_vectorizer.get_feature_names_out()
    
    for i, (topic, color) in enumerate(zip(lda_model.components_, colors)):
        top_words_idx = np.argsort(topic)[::-1][:10]  # descending order
        top_words = np.array(feature_names)[top_words_idx]
        top_scores = topic[top_words_idx]

        ax = axes[i]
        ax.barh(top_words, top_scores, color=color)
        ax.set_title(f'Topic {i}', fontsize=12, fontweight='bold')
        ax.invert_yaxis()
        ax.tick_params(axis='both', which='major', labelsize=10)
        for spine in ax.spines.values():
            spine.set_visible(False)

    fig.suptitle('Top Words per Topic', fontsize=16)
    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    
    # Save plot to BytesIO and display in Streamlit
    buf = BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    plt.close(fig)
    return buf

# Streamlit UI
st.title('News Headline and Article Analysis')

# User input for headline
headline = st.text_input('Enter a news headline:')
if headline:
    sentiment = predict_sentiment(headline)
    st.write(f'Sentiment: {sentiment}')

# User input for article
article = st.text_area('Enter a news article text:')
if article:
    topics_plot = get_topics(article)
    st.image(topics_plot)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
2024-07-20 19:05:32.022 
  command:

    streamlit run c:\Users\ChokJoe\anaconda3\envs\myenv\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-07-20 19:05:32.023 Session state does not function when running a script without `streamlit run`
