In [None]:
!pip install nltk scikit-learn pandas numpy matplotlib seaborn


In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
url = "https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv"
data = pd.read_csv(url)

data.head()


In [None]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\w+|#", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return " ".join(words)

data['clean_tweet'] = data['tweet'].apply(clean_text)
data.head()


In [None]:
import nltk
nltk.download('stopwords')


In [None]:
from nltk.corpus import stopwords

stop_words = set(stopwords.words('english'))

def clean_text(text):
    text = text.lower()
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\w+|#", "", text)
    text = re.sub(r"[^a-z\s]", "", text)
    words = text.split()
    words = [w for w in words if w not in stop_words]
    return " ".join(words)

data['clean_tweet'] = data['tweet'].apply(clean_text)
data.head()


In [None]:
X = data['clean_tweet']
y = data['label']

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

vectorizer = TfidfVectorizer(max_features=5000)

X_train_tfidf = vectorizer.fit_transform(X_train)
X_test_tfidf = vectorizer.transform(X_test)


In [None]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression()
model.fit(X_train_tfidf, y_train)


In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test_tfidf)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

cm = confusion_matrix(y_test, y_pred)

sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.xlabel("Predicted Label")
plt.ylabel("Actual Label")
plt.title("Confusion Matrix")
plt.show()


In [None]:
def predict_sentiment(text):
    text = clean_text(text)
    vector = vectorizer.transform([text])
    result = model.predict(vector)
    return "Positive üòä" if result[0] == 1 else "Negative üò†"


In [None]:
predict_sentiment("I really love this project, it's amazing!")


In [None]:
clean_text("I really love this project, it's amazing!")


In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf, y_train)


In [None]:
predict_sentiment("I really love this project, it's amazing!")


In [None]:
!pip install transformers torch


In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification


In [None]:
model_name = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)


In [None]:
def bert_predict_sentiment(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)

    with torch.no_grad():
        outputs = model(**inputs)

    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=1).item()

    return "Positive üòä" if predicted_class == 1 else "Negative üò†"


In [None]:
bert_predict_sentiment("I really love this project, it's amazing!")


In [None]:
bert_predict_sentiment("This is the worst experience ever")
bert_predict_sentiment("I hate this product")
bert_predict_sentiment("Not bad, could be better")
bert_predict_sentiment("Absolutely fantastic work!")


In [None]:
print(bert_predict_sentiment("This is the worst experience ever"))
print(bert_predict_sentiment("I hate this product"))
print(bert_predict_sentiment("Not bad, could be better"))
print(bert_predict_sentiment("Absolutely fantastic work!"))


In [None]:
!pip install langdetect sentencepiece


In [None]:
from langdetect import detect
from transformers import MarianMTModel, MarianTokenizer


In [None]:
translator_model_name = "Helsinki-NLP/opus-mt-mul-en"

translator_tokenizer = MarianTokenizer.from_pretrained(translator_model_name)
translator_model = MarianMTModel.from_pretrained(translator_model_name)


In [None]:
def translate_to_english(text):
    inputs = translator_tokenizer(text, return_tensors="pt", padding=True, truncation=True)

    translated = translator_model.generate(**inputs)

    english_text = translator_tokenizer.decode(translated[0], skip_special_tokens=True)
    return english_text


In [None]:
def multilingual_sentiment(text):
    try:
        lang = detect(text)
    except:
        lang = "unknown"

    # If not English, translate
    if lang != "en":
        translated_text = translate_to_english(text)
    else:
        translated_text = text

    sentiment = bert_predict_sentiment(translated_text)

    return {
        "original_text": text,
        "detected_language": lang,
        "translated_text": translated_text,
        "sentiment": sentiment
    }


In [None]:
multilingual_sentiment("I really love this project")


In [None]:
multilingual_sentiment("‡¥à ‡¥™‡µç‡¥∞‡µã‡¥ú‡¥ï‡µç‡¥ü‡µç ‡¥µ‡¥≥‡¥∞‡µÜ ‡¥®‡¥≤‡µç‡¥≤‡¥§‡¥æ‡¥£‡µç")


In [None]:
multilingual_sentiment("‡§Ø‡§π ‡§Ö‡§®‡•Å‡§≠‡§µ ‡§¨‡§π‡•Å‡§§ ‡§ñ‡§∞‡§æ‡§¨ ‡§•‡§æ")


In [None]:
multilingual_sentiment("‡Æá‡Æ®‡Øç‡Æ§ ‡Æö‡ØÜ‡ÆØ‡Æ≤‡Æø ‡ÆÖ‡Æ∞‡ØÅ‡ÆÆ‡Øà‡ÆØ‡Ææ‡Æï ‡Æâ‡Æ≥‡Øç‡Æ≥‡Æ§‡ØÅ")


In [None]:
multilingual_sentiment("‡Æá‡Æ®‡Øç‡Æ§ ‡Æö‡Øá‡Æµ‡Øà ‡ÆÆ‡Æø‡Æï‡Æµ‡ØÅ‡ÆÆ‡Øç ‡ÆÆ‡Øã‡Æö‡ÆÆ‡Ææ‡Æï ‡Æâ‡Æ≥‡Øç‡Æ≥‡Æ§‡ØÅ")


In [None]:
multilingual_sentiment("‡¥à ‡¥Ö‡¥®‡µÅ‡¥≠‡¥µ‡¥Ç ‡¥§‡µÄ‡¥∞‡µÜ ‡¥Æ‡µã‡¥∂‡¥Æ‡¥æ‡¥Ø‡¥ø‡¥∞‡µÅ‡¥®‡µç‡¥®‡µÅ")


In [None]:
multilingual_sentiment("‡∞à ‡∞Ø‡∞æ‡∞™‡±ç ‡∞ö‡∞æ‡∞≤‡∞æ ‡∞â‡∞™‡∞Ø‡±ã‡∞ó‡∞ï‡∞∞‡∞Ç‡∞ó‡∞æ ‡∞â‡∞Ç‡∞¶‡∞ø")


In [None]:
multilingual_sentiment("Cette application est vraiment g√©niale")


In [None]:
multilingual_sentiment("Esta aplicaci√≥n es terrible")


In [None]:
multilingual_sentiment("Diese App ist sehr n√ºtzlich")


In [None]:
multilingual_sentiment("„Åì„ÅÆ„Ç¢„Éó„É™„ÅØÊúÄÊÇ™„Åß„Åô")


In [None]:
multilingual_sentiment("Ïù¥ Ïï±ÏùÄ Ï†ïÎßê Ï¢ãÏïÑÏöî")
