<a href="https://colab.research.google.com/github/Jaygohel-dev/Jay-Infotech/blob/main/Twitter_Sentiment_Analysis_JayGohel.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [20]:
print("Twitter Sentiment Analysis Using NLP")
print("AI Internship Submission")
print("By: Jay Gohel")
print("Date: 26-07-2025")
print("Mentor: Dr. Alok Yadav")


Twitter Sentiment Analysis Using NLP
AI Internship Submission
By: Jay Gohel
Date: 26-07-2025
Mentor: Dr. Alok Yadav


In [21]:
# @title  Install Required Packages
!pip install pandas numpy matplotlib nltk scikit-learn --quiet
!python -m nltk.downloader stopwords punkt wordnet --quiet
print(" Environment ready!")


 Environment ready!


In [30]:
# @title  Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix

print(" Libraries loaded")


 Libraries loaded


In [31]:
# @title Load Movie Reviews Dataset
# Sample dataset (50 positive/50 negative reviews)
data = {
    'review': [
        "This movie was amazing! Loved every moment.",
        "Terrible plot and bad acting.",
        "The cinematography was beautiful.",
        "Worst film I've seen this year.",
        "Highly recommended for all audiences.",
        "Boring and predictable storyline.",
        "The actors delivered great performances.",
        "Waste of time and money.",
        "A masterpiece of modern cinema.",
        "Couldn't even finish watching it."
    ],
    'sentiment': [1, 0, 1, 0, 1, 0, 1, 0, 1, 0]  # 1=positive, 0=negative
}


df = pd.DataFrame(data)
print(" Dataset Preview:")
display(df.head())

 Dataset Preview:


Unnamed: 0,review,sentiment
0,This movie was amazing! Loved every moment.,1
1,Terrible plot and bad acting.,0
2,The cinematography was beautiful.,1
3,Worst film I've seen this year.,0
4,Highly recommended for all audiences.,1


In [24]:
# @title Clean Text Data
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

def clean_text(text):
    # Lowercase and remove non-alphabetic chars
    text = ''.join([c.lower() for c in str(text) if c.isalpha() or c == ' '])

    # Remove stopwords and lemmatize
    words = [lemmatizer.lemmatize(w) for w in text.split() if w not in stop_words]
    return ' '.join(words)

df['clean_review'] = df['review'].apply(clean_text)
print("🧹 Cleaned Text Examples:")
display(df[['review', 'clean_review']].head(2))


🧹 Cleaned Text Examples:


Unnamed: 0,review,clean_review
0,This movie was amazing! Loved every moment.,movie amazing loved every moment
1,Terrible plot and bad acting.,terrible plot bad acting


In [25]:
# @title  Convert Text to Numbers
tfidf = TfidfVectorizer(max_features=1000)
X = tfidf.fit_transform(df['clean_review']).toarray()
y = df['sentiment'].values

print(f" Feature Matrix Shape: {X.shape}")
print(f" Sample Features: {X[0][:5]}")  # Show first 5 features


 Feature Matrix Shape: (10, 36)
 Sample Features: [0.        0.        0.4472136 0.        0.       ]


In [26]:
# @title  Split Data
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.3,
    random_state=42
)

print(f" Training samples: {X_train.shape[0]}")
print(f" Test samples: {X_test.shape[0]}")


 Training samples: 7
 Test samples: 3


In [27]:
# @title  Train Naive Bayes Classifier
model = MultinomialNB()
model.fit(X_train, y_train)

print(" Model trained successfully!")


 Model trained successfully!


In [28]:
# @title  Check Accuracy
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f" Accuracy: {acc*100:.1f}%")

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:")
print(cm)


 Accuracy: 33.3%

Confusion Matrix:
[[0 2]
 [0 1]]


In [29]:
# @ Test With Your Own Review
test_review = input("Enter a movie review: ")

# Preprocess
clean_review = clean_text(test_review)
vector = tfidf.transform([clean_review])

# Predict
prediction = model.predict(vector)[0]
print("\n💡 Prediction:", "Positive" if prediction == 1 else "Negative")


Enter a movie review: the oracle

💡 Prediction: Positive
