In [4]:
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle

@st.cache_data
def load_data():
    df_test = pd.read_csv("data/test.csv", index_col=False)
    df_train = pd.read_csv("data/train.csv", index_col=False)
    return df_train, df_test

df_train, df_test = load_data()

@st.cache_resource
def load_models():
    model_paths = {
        'Logistic Regression': 'models/logistic_regression.pkl',
        'Decision Tree': 'models/decision_tree.pkl',
        'Random Forest': 'models/random_forest.pkl'
    }
    models = {}
    for name, path in model_paths.items():
        with open(path, 'rb') as file:
            models[name] = pickle.load(file)
    return models

models = load_models()

@st.cache_resource
def load_vectorizer():
    with open('models/vectorizer.pkl', 'rb') as file:
        vectorizer = pickle.load(file)
    return vectorizer

vectorizer = load_vectorizer()

st.sidebar.title("Navigation")
options = st.sidebar.radio("Select a page:", ["Project Overview", "EDA", "Prediction"])

if options == "Project Overview":
    st.title("Project Overview")
    st.write("""
    This project is about text classification using multiple models.
    You can navigate to different pages using the sidebar.
    """)

elif options == "EDA":
    st.title("Exploratory Data Analysis")
    
    st.write("### Train Data Head")
    st.write(df_train.head())
    
    st.write("### Test Data Head")
    st.write(df_test.head())
    
    st.write("### Train Data Info")
    st.write(df_train.info())
    
    st.write("### Test Data Info")
    st.write(df_test.info())
    
    st.write("### Data Distribution")
    fig, ax = plt.subplots()
    sns.histplot(df_train['category'], kde=False, ax=ax)  # Use 'category' instead of 'label'
    st.pyplot(fig)

elif options == "Prediction":
    st.title("Text Classification")
    text_input = st.text_area("Enter text to classify:")
    model_choice = st.selectbox("Choose a model:", list(models.keys()))

    if st.button("Classify"):
        model = models[model_choice]
        transformed_text = vectorizer.transform([text_input])
        prediction = model.predict(transformed_text)
        st.write(f"The text is classified as: {prediction[0]}")


2024-07-04 08:02:36.702 No runtime found, using MemoryCacheStorageManager
2024-07-04 08:02:36.782 No runtime found, using MemoryCacheStorageManager
2024-07-04 08:02:37.608 Session state does not function when running a script without `streamlit run`
