In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report
import re



# Load the dataset
file_path = '/Users/pranulokhande/Downloads/Traxidy Dataset/issues.csv'  # Replace with your file path
issues_df = pd.read_csv(file_path)

# Define a manual set of common stop words
manual_stop_words = set([
    "i", "me", "my", "myself", "we", "our", "ours", "ourselves", "you", "your", "yours", "yourself", "yourselves",
    "he", "him", "his", "himself", "she", "her", "hers", "herself", "it", "its", "itself", "they", "them", "their",
    "theirs", "themselves", "what", "which", "who", "whom", "this", "that", "these", "those", "am", "is", "are", 
    "was", "were", "be", "been", "being", "have", "has", "had", "having", "do", "does", "did", "doing", "a", "an", 
    "the", "and", "but", "if", "or", "because", "as", "until", "while", "of", "at", "by", "for", "with", "about", 
    "against", "between", "into", "through", "during", "before", "after", "above", "below", "to", "from", "up", 
    "down", "in", "out", "on", "off", "over", "under", "again", "further", "then", "once", "here", "there", "when", 
    "where", "why", "how", "all", "any", "both", "each", "few", "more", "most", "other", "some", "such", "no", "nor", 
    "not", "only", "own", "same", "so", "than", "too", "very", "s", "t", "can", "will", "just", "don", "should", 
    "now"
])

# Data preprocessing function
def preprocess_text(text):
    text = text.lower()  # Lowercase
    text = re.sub(r'[^a-z\s]', '', text)  # Remove special characters and numbers
    text = ' '.join(word for word in text.split() if word not in manual_stop_words)  # Remove stopwords
    return text

# Apply preprocessing to the 'description' column
issues_df['processed_description'] = issues_df['description'].apply(preprocess_text)

# Labeling function based on keywords
def label_sentiment(text):
    negative_keywords = ['support', 'error', 'issue', 'fail', 'problem']
    positive_keywords = ['planning', 'success', 'meeting', 'completed', 'done']
    if any(word in text for word in negative_keywords):
        return 'negative'
    elif any(word in text for word in positive_keywords):
        return 'positive'
    else:
        return 'neutral'

# Apply labeling
issues_df['sentiment'] = issues_df['processed_description'].apply(label_sentiment)

# Filter out neutral labels to focus on positive and negative only
issues_df_filtered = issues_df[issues_df['sentiment'] != 'neutral']

# Splitting the data
X_train, X_test, y_train, y_test = train_test_split(
    issues_df_filtered['processed_description'], 
    issues_df_filtered['sentiment'], 
    test_size=0.2, 
    random_state=42
)

# Create a pipeline with CountVectorizer and Naive Bayes classifier
pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', MultinomialNB())
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict and evaluate
y_pred = pipeline.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=False)

# Display the classification report
print("Classification Report:\n", report)


Classification Report:
               precision    recall  f1-score   support

    negative       1.00      0.67      0.80         3
    positive       0.96      1.00      0.98        23

    accuracy                           0.96        26
   macro avg       0.98      0.83      0.89        26
weighted avg       0.96      0.96      0.96        26



In [9]:
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
import re

# Use the same manual stop words, label_sentiment function, and preprocessing function as before
manual_stop_words = set([...])  # (Same list of stopwords as before)

def preprocess_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    text = ' '.join(word for word in text.split() if word not in manual_stop_words)
    return text

# Create and train the model
pipeline = Pipeline([
    ('vectorizer', CountVectorizer()),
    ('classifier', MultinomialNB())
])
# Assume X_train and y_train have been defined based on previous code
pipeline.fit(X_train, y_train)




Comment: The dashboard is incredibly user-friendly and intuitive!
Sentiment: positive - Color: green

Comment: I found it difficult to navigate through the settings.
Sentiment: negative - Color: red

Comment: The data visualizations are very helpful and accurate.
Sentiment: positive - Color: green

Comment: Sometimes the dashboard takes too long to load.
Sentiment: negative - Color: red

Comment: I love the clean design and easy access to key metrics.
Sentiment: positive - Color: green

Comment: The support team was not helpful at all.
Sentiment: neutral - Color: yellow

Comment: It’s missing some features that I expected to see.
Sentiment: negative - Color: red

Comment: The recent update made everything much smoother and faster!
Sentiment: positive - Color: green

Comment: I experienced a few glitches while trying to filter data.
Sentiment: positive - Color: green

Comment: The color scheme is visually pleasing and professional.
Sentiment: positive - Color: green

Comment: Certain me

In [11]:
import streamlit as st
from textblob import TextBlob

# Define the function to classify comments with color coding
def classify_comment_with_textblob(comment):
    blob = TextBlob(comment)
    polarity = blob.sentiment.polarity
    
    # Classify based on polarity score
    if polarity > 0:
        sentiment = 'positive'
        color = 'green'
    elif polarity < 0:
        sentiment = 'negative'
        color = 'red'
    else:
        sentiment = 'neutral'
        color = 'yellow'
    
    return sentiment, color

# Sample feedback comments
comments = [
    "The dashboard is incredibly user-friendly and intuitive!",
    "I found it difficult to navigate through the settings.",
    "The data visualizations are very helpful and accurate.",
    "Sometimes the dashboard takes too long to load.",
    "I love the clean design and easy access to key metrics.",
    "The support team was not helpful at all.",
    "It’s missing some features that I expected to see.",
    "The recent update made everything much smoother and faster!",
    "I experienced a few glitches while trying to filter data.",
    "The color scheme is visually pleasing and professional.",
    "Certain metrics are hard to find, it could be more organized.",
    "Excellent tool for tracking project progress in real-time!",
    "The tutorial wasn’t very informative, could use improvement.",
    "Navigation is a breeze, I appreciate the user experience focus.",
    "I encountered a bug that froze the dashboard entirely.",
    "It’s a valuable tool but needs more customization options."
]

# Set up the Streamlit app
st.title("Traxidity Dashboard Feedback Sentiment")
st.write("This dashboard displays user feedback comments color-coded by sentiment.")

# Display each comment with its sentiment
for comment in comments:
    sentiment, color = classify_comment_with_textblob(comment)
    if color == 'green':
        st.success(f"Sentiment: {sentiment.capitalize()} - {comment}")
    elif color == 'red':
        st.error(f"Sentiment: {sentiment.capitalize()} - {comment}")
    elif color == 'yellow':
        st.warning(f"Sentiment: {sentiment.capitalize()} - {comment}")

2024-11-04 23:19:29.285 
  command:

    streamlit run /opt/anaconda3/lib/python3.12/site-packages/ipykernel_launcher.py [ARGUMENTS]
