In [2]:
import streamlit as st
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import MultinomialNB
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
df = pd.read_csv('test.csv')

# Preprocess the data
df['content'] = df['content'].fillna('')
df['category'] = df['category'].astype('category')

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(df['content'], df['category'], test_size=0.2, random_state=42)

# Vectorize the text data
vectorizer = TfidfVectorizer(stop_words='english')
X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)

# Initialize models
models = {
    "Logistic Regression": LogisticRegression(),
    "Naive Bayes": MultinomialNB(),
    "Support Vector Machine": SVC()
}

# Train and evaluate models
model_performance = {}
for model_name, model in models.items():
    model.fit(X_train_tfidf, y_train)
    y_pred = model.predict(X_test_tfidf)
    accuracy = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred, output_dict=True)
    model_performance[model_name] = {
        "accuracy": accuracy,
        "report": report
    }

# Streamlit app
st.title("News Article Classification")

# Sidebar for navigation
st.sidebar.title("Navigation")
options = ["Team", "Project Overview", "EDA", "Model Performance"]
choice = st.sidebar.radio("Go to", options)

if choice == "Team":
    st.header("Team Page")
    st.write("This is the team page.")

elif choice == "Project Overview":
    st.header("Project Overview")
    st.write("This project aims to classify news articles into different categories using machine learning models.")

elif choice == "EDA":
    st.header("Exploratory Data Analysis")
    st.write("Here we will show some EDA.")
    st.write(df.head())

elif choice == "Model Performance":
    st.header("Model Performance")
    for model_name, performance in model_performance.items():
        st.subheader(model_name)
        st.write(f"Accuracy: {performance['accuracy']}")
        st.write("Classification Report:")
        st.json(performance['report'])

