# Import Libraries and Load Data


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Load and Explore Data


In [None]:
# Load data
df = pd.read_csv('social_media_sentiment_dataset.csv')

# Display the first few rows of the dataset
df.head()

# Display basic information about the dataset
df.info()

# Summary statistics
df.describe()

# Check for missing values
df.isnull().sum()

# Visualize the distribution of sentiments
sns.countplot(x='sentiment', data=df)
plt.title('Distribution of Sentiments')
plt.show()

# Data Preprocessing


In [None]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    df['text'], df['sentiment'], test_size=0.2, random_state=42)

# Convert text data to numerical representation using CountVectorizer
vectorizer = CountVectorizer()
X_train_vectorized = vectorizer.fit_transform(X_train)
X_test_vectorized = vectorizer.transform(X_test)

# Model Training


In [None]:
# Initialize Naive Bayes model
nb_model = MultinomialNB()

# Train the model
nb_model.fit(X_train_vectorized, y_train)

# Model Evaluation


In [None]:
# Predictions on the test set
y_pred = nb_model.predict(X_test_vectorized)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
classification_report_str = classification_report(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)

print(f"Accuracy: {accuracy:.2f}")
print("\nClassification Report:")
print(classification_report_str)
print("\nConfusion Matrix:")
print(conf_matrix)

# Visualizations


In [None]:
# Define the sentiments
sentiments = ['negative', 'neutral', 'positive']

# Visualize the confusion matrix
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues',
            xticklabels=sentiments, yticklabels=sentiments)
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()

# Visualize the distribution of predicted sentiments
sns.countplot(x=y_pred, order=sentiments)
plt.title('Distribution of Predicted Sentiments')
plt.show()