<a href="https://colab.research.google.com/github/Dharunika-07/Political_sentiment_analysis/blob/main/Political_Sentimental_Analysis_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset
train_data = pd.read_csv('/content/drive/MyDrive/train_data.csv')
# Debugging: Check the data structure
print("Train Data Head:\n", train_data.head())

# Preprocess text: Remove special characters and convert to lowercase
def preprocess_text(text):
    text = str(text).lower()  # Convert to lowercase
    text = text.replace(r"[^\w\s]", "")  # Remove special characters
    return text

# Apply preprocessing to the dataset
train_data['content'] = train_data['content'].apply(preprocess_text)

# Split data into features (X) and labels (y)
X = train_data['content']
y = train_data['labels']

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert text data to numerical data using CountVectorizer
vectorizer = CountVectorizer()
X_train_vect = vectorizer.fit_transform(X_train)
X_test_vect = vectorizer.transform(X_test)

# Train the Naive Bayes model
model = MultinomialNB()
model.fit(X_train_vect, y_train)

# Predict sentiments for the test data
y_pred = model.predict(X_test_vect)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:\n", classification_report(y_test, y_pred))

# Predict sentiments for the entire dataset
test_data = pd.read_csv('/content/drive/MyDrive/test_data.csv')  # Replace with your actual test dataset file
test_data['content'] = test_data['content'].apply(preprocess_text)
X_test_full = vectorizer.transform(test_data['content'])
predicted_labels = model.predict(X_test_full)

# Add predictions to test dataset
test_data['predicted_labels'] = predicted_labels

# Save predictions to a new CSV file
output_file = 'sentiment_predictions_nb.csv'
test_data[['content', 'predicted_labels']].to_csv(output_file, index=False, encoding='utf-8-sig')
print(f"Predictions saved to {output_file}")
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")


Train Data Head:
                                              content         labels
0  родрпЖройрпНроХро╛роЪро┐ родрпКроХрпБродро┐ рокрпБродро┐роп родрооро┐ро┤роХроорпН роХроЯрпНроЪро┐ ро╡рпЗроЯрпНрокро╛ро│ро░рпН ...        Neutral
1  роЕрогрпНрогройрпН роЗродройрпИ роЪрпВроЪроХрооро╛роХ 11 рооро╛родроЩрпНроХро│рпН роорпБройрпНрокрпЗ рокрпЗроЯрпНроЯро┐роп...  Substantiated
2  роТро░рпБ ро╡ро░рпБроЯроорпН роЖроХро┐ ро╡ро┐роЯрпНроЯродрпБ роЗроирпНрод родрпБропро░роорпН роирпЗро░рпНроирпНродрпБ......    Opinionated
3  роОроЯрокрпНрокро╛роЯро┐ропрпИ роХрогрпНроЯрпБроХрпКро│рпНро│ро╛род "роОроЯрокрпНрокро╛роЯро┐"ЁЯлв\n ---\nроЖродро░...       Positive
4  роОроЩрпНроХро│ро┐ройрпН роЕро░роЪро┐ропро▓рпН роЕроЯрпБродрпНрод родро▓рпИроорпБро▒рпИроХрпНроХрпБрооро╛ройродрпБ \n#роороХ...    Opinionated
Accuracy: 0.31
Classification Report:
                    precision    recall  f1-score   support

         Negative       0.15      0.02      0.04        82
          Neutral       0.17      0.15 