<a href="https://colab.research.google.com/github/GULSHANKUMAR6079/Sentiment-analysis/blob/main/Twitter.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
import seaborn as sns
import joblib
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer

nltk.download('vader_lexicon')  # Download VADER lexicon if not already downloaded

class SentimentAnalysisModel:
    def __init__(self, model_filename='sentiment_model.joblib'):
        self.model_filename = model_filename
        self.pipeline = Pipeline([
            ('tfidf', TfidfVectorizer()),
            ('clf', LinearSVC())
        ])
        try:
            self.load_model()
            print("Model loaded successfully!")
        except FileNotFoundError:
            print("Model file not found. Training a new model...")

    def train_model(self, X_train, y_train):
        """Trains the sentiment analysis model."""
        X_train = X_train.fillna('')
        self.pipeline.fit(X_train, y_train)
        self.save_model()

    def evaluate_model(self, X_test, y_test):
        """Evaluates the model and returns metrics."""
        X_test = X_test.fillna('')
        y_pred = self.pipeline.predict(X_test)
        metrics = {
            'classification_report': classification_report(y_test, y_pred),
            'confusion_matrix': confusion_matrix(y_test, y_pred),
            'accuracy_score': accuracy_score(y_test, y_pred)
        }
        return metrics

    def visualize_confusion_matrix(self, confusion_matrix):
        """Visualizes the confusion matrix."""
        plt.figure(figsize=(8, 6))
        sns.heatmap(confusion_matrix, annot=True, fmt='d', cmap='Blues')
        plt.title('Confusion Matrix')
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        plt.show()

    def save_model(self):
        """Saves the trained model to a file."""
        joblib.dump(self.pipeline, self.model_filename)

    def load_model(self):
        """Loads the trained model from a file."""
        self.pipeline = joblib.load(self.model_filename)

    def predict_sentiment(self, text):
        """Predicts the sentiment of the given text."""
        # 1. Use VADER for initial sentiment analysis
        analyzer = SentimentIntensityAnalyzer()
        vader_scores = analyzer.polarity_scores(text)

        # 2. If VADER detects strong negative sentiment, override model prediction
        if vader_scores['neg'] > 0.5:  # Adjust threshold as needed
            return -1.0  # Return negative sentiment (-1.0)

        # 3. Otherwise, use the model's prediction
        text = pd.Series([text]).fillna("")
        prediction = self.pipeline.predict(text)[0]
        return prediction

def prepare_dataset(df, text_column, label_column):
    """Prepares the dataset for training."""
    X_train, X_test, y_train, y_test = train_test_split(
        df[text_column], df[label_column], test_size=0.2, random_state=42
    )
    train_not_na = y_train.notna()
    X_train = X_train[train_not_na]
    y_train = y_train[train_not_na]
    test_not_na = y_test.notna()
    X_test = X_test[test_not_na]
    y_test = y_test[test_not_na]
    return X_train, X_test, y_train, y_test

def main():
    """Main function to load data, train, and evaluate the model."""
    df = pd.read_csv('/content/Twitter_Data.csv')
    text_column = 'text' if 'text' in df.columns else 'clean_text'
    label_column = 'sentiment' if 'sentiment' in df.columns else 'category'

    sentiment_model = SentimentAnalysisModel()

    # Train only if the model wasn't loaded from a file
    if sentiment_model.pipeline.named_steps['clf'].coef_.size == 0:
        X_train, X_test, y_train, y_test = prepare_dataset(df, text_column, label_column)
        sentiment_model.train_model(X_train, y_train)
        metrics = sentiment_model.evaluate_model(X_test, y_test)
        print(metrics['classification_report'])
        sentiment_model.visualize_confusion_matrix(metrics['confusion_matrix'])

    # Get user input and predict sentiment
    while True:
        user_input = input("Enter text for sentiment analysis (or 'quit' to exit): ")
        if user_input.lower() == 'quit':
            break
        prediction = sentiment_model.predict_sentiment(user_input)
        print("Sentiment:", prediction)

if __name__ == '__main__':
    main()

print("Sentiment Analysis Model is ready!")

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...


Model loaded successfully!
Enter text for sentiment analysis (or 'quit' to exit): foolish
Sentiment: -1.0
Enter text for sentiment analysis (or 'quit' to exit): regret 
Sentiment: -1.0
Enter text for sentiment analysis (or 'quit' to exit): awful
Sentiment: -1.0
Enter text for sentiment analysis (or 'quit' to exit): harassment
Sentiment: -1.0
Enter text for sentiment analysis (or 'quit' to exit): happy
Sentiment: 1.0
Enter text for sentiment analysis (or 'quit' to exit): someone is seeing me
Sentiment: 0.0
Enter text for sentiment analysis (or 'quit' to exit): someone is gazing me
Sentiment: 0.0
Enter text for sentiment analysis (or 'quit' to exit): cheatoing
Sentiment: 0.0
Enter text for sentiment analysis (or 'quit' to exit): cheating
Sentiment: -1.0
Enter text for sentiment analysis (or 'quit' to exit): quit
Sentiment Analysis Model is ready!
