<a href="https://colab.research.google.com/github/Karthik-coder2007/sentiment-analysis-project/blob/main/sentiment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install nltk




In [24]:
import nltk
nltk.download('vader_lexicon')

from nltk.sentiment import SentimentIntensityAnalyzer


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [2]:
sia = SentimentIntensityAnalyzer()


In [3]:
sia.polarity_scores("The movie was okay, not great but not bad")


{'neg': 0.161, 'neu': 0.449, 'pos': 0.391, 'compound': 0.4728}

In [4]:
text = "I love this movie, it was amazing!"
score = sia.polarity_scores(text)
print(score)


{'neg': 0.0, 'neu': 0.325, 'pos': 0.675, 'compound': 0.8516}


In [5]:
def vader_sentiment(text):
    score = sia.polarity_scores(text)['compound']
    if score >= 0.05:
        return "Positive"
    elif score <=-0.05:
        return "Negative"
    else:
        return "Neutral"


vader_sentiment("The movie was okay, not great but not bad")


'Positive'

In [6]:
texts = [
    "I love this movie!",
    "This movie is terrible.",
    "It was okay, not bad."
]

for t in texts:
    print(t, "→", vader_sentiment(t))


I love this movie! → Positive
This movie is terrible. → Negative
It was okay, not bad. → Positive


In [7]:
!pip install nltk
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()




def vader_sentiment(text, pos_thresh=0.05, neg_thresh=-0.05):

    score = sia.polarity_scores(text)['compound']

    if score > pos_thresh:
        sentiment = "Positive"
    elif score < neg_thresh:
        sentiment = "Negative"
    else:
        sentiment = "Neutral"

    return sentiment, score




[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [8]:
text = "The movie was okay, not great but not bad"
sentiment, score = vader_sentiment(text)
print(f"Sentiment: {sentiment}, Compound Score: {score}")


Sentiment: Positive, Compound Score: 0.4728


In [10]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score


In [11]:
import pandas as pd

review = pd.read_csv('review.csv')

features = review['text']
labels = review['sentiment']

X_train, X_test, y_train, y_test = train_test_split(
    features,
    labels,
    test_size=0.2,
    stratify=labels,
    random_state=42
)


In [12]:
vectoriser = TfidfVectorizer(
    max_features=5000,
    ngram_range=(1,2),
    stop_words='english',
    sublinear_tf=True
)


In [13]:
X_train = vectoriser.fit_transform(X_train)
X_test = vectoriser.transform(X_test)


In [14]:
classifier = SVC(
    kernel='linear',
    C=1.0,
    probability=True,
    class_weight='balanced'
)


In [15]:
classifier.fit(X_train, y_train)


In [16]:
predictions = classifier.predict(X_test)


In [17]:
from sklearn.metrics import classification_report, confusion_matrix

print("Accuracy:", accuracy_score(y_test, predictions))
print("\nClassification Report:\n", classification_report(y_test, predictions))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, predictions))


Accuracy: 1.0

Classification Report:
               precision    recall  f1-score   support

    negative       1.00      1.00      1.00        36
     neutral       1.00      1.00      1.00        33
    positive       1.00      1.00      1.00        31

    accuracy                           1.00       100
   macro avg       1.00      1.00      1.00       100
weighted avg       1.00      1.00      1.00       100


Confusion Matrix:
 [[36  0  0]
 [ 0 33  0]
 [ 0  0 31]]


In [18]:
def predict_sentiment(text):
    text_vector = vectoriser.transform([text])
    prediction = classifier.predict(text_vector)[0]
    probability = classifier.predict_proba(text_vector).max()
    return prediction, probability


In [19]:
predict_sentiment("The course explanation was awful")


('negative', np.float64(0.3631704315007546))

In [20]:
import joblib

joblib.dump(classifier, "svm_sentiment_model.pkl")
joblib.dump(vectoriser, "tfidf_vectorizer.pkl")


['tfidf_vectorizer.pkl']

In [21]:
classifier = joblib.load("svm_sentiment_model.pkl")
vectoriser = joblib.load("tfidf_vectorizer.pkl")


In [22]:
classifier = joblib.load("svm_sentiment_model.pkl")
vectoriser = joblib.load("tfidf_vectorizer.pkl")

predict_sentiment("The lecture was excellent")


('positive', np.float64(0.7507650506036465))

In [None]:
def hybrid_sentiment(text):
    vader_label, vader_score = vader_sentiment(text)


    text_vector = vectoriser.transform([text])
    svm_label = classifier.predict(text_vector)[0]
    svm_prob = classifier.predict_proba(text_vector).max()

    return {
        "VADER": (vader_label, vader_score),
        "SVM": (svm_label, svm_prob)
    }

hybrid_sentiment("The movie was okay, not great but not bad")


{'VADER': ('Positive', 0.4728),
 'SVM': ('neutral', np.float64(0.9070525018055635))}