In [None]:
import kagglehub
kazanova_sentiment140_path = kagglehub.dataset_download('kazanova/sentiment140')

print('Data source import complete.')


#  Step 1: Prepare the work environment
📌 We install essential tools for data analysis (Pandas, NumPy), visualization (Matplotlib, Seaborn), machine learning (Scikit-learn), and language processing (nltk).

In [None]:
!pip install pandas numpy matplotlib seaborn scikit-learn nltk

# ✨ Step 2: Upload and process data
📌 We uploaded the file, renamed the columns, removed the unnecessary ones, and finally converted the numbers to understandable labels (negative/neutral/positive).

In [None]:
import pandas as pd

df = pd.read_csv('/kaggle/input/sentiment140/training.1600000.processed.noemoticon.csv',
                 encoding='latin-1', header=None)

df.columns = ['target', 'ids', 'date', 'flag', 'user', 'text']
df = df[['target', 'text']]
df['target'] = df['target'].replace({0: 'negative', 2: 'neutral', 4: 'positive'})
df.head()

# ✨ Step 3: Clean up texts
📌 Here we prepare the text for learning: remove links, symbols, and hashtags, and make the letters lowercase.

In [None]:
import re

def clean_text(text):
    text = re.sub(r"http\S+", "", text)  # Remove links
    text = re.sub(r"@\w+", "", text)     # Remove the mention
    text = re.sub(r"#", "", text)        # Remove hashtag
    text = re.sub(r"[^\w\s]", "", text)  # Remove symbols
    text = text.lower()
    return text

df['cleaned_text'] = df['text'].apply(clean_text)

# ✨ Step 4: Convert text to digital vector (TF-IDF Vectorizer)
📌 We use TF-IDF technology to convert words into numbers that algorithms can understand. We exclude common words (stopwords) and identify only 10,000 features.

In [None]:
import nltk
nltk.download('stopwords')
from nltk.corpus import stopwords

stop_words = stopwords.words('english')

from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(stop_words=stop_words, max_features=10000)
X = vectorizer.fit_transform(df['cleaned_text'])
y = df['target']

# ✨ Step 5: Splitting the data to train the model

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

from sklearn.linear_model import LogisticRegression
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# ✨ Step 6: Evaluate the model
📌 Here we evaluate the model's performance using classification accuracy, balancing precision and recall for each type of emotion.

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))

In [None]:
import joblib

# Save the model
joblib.dump(model, 'sentiment_model.pkl')

# Save the vectorizer
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

In [None]:
import joblib

joblib.dump(model, 'sentiment_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')


# ✨ Step 7: Create a Gradio App

In [None]:
#install gradio
!pip install gradio

In [None]:
import gradio as gr
import joblib
import re

# Load the model and TF-IDF vectorizer
model = joblib.load("sentiment_model.pkl")
vectorizer = joblib.load("tfidf_vectorizer.pkl")

# Text cleaning function
def clean_text(text):
    text = re.sub(r"http\S+|@\w+|#", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    return text.lower()

# Sentiment prediction function
def predict_sentiment(text):
    cleaned = clean_text(text)
    vectorized = vectorizer.transform([cleaned])
    prediction = model.predict(vectorized)[0]
    if prediction == "positive":
        return "🎉 Sentiment: Positive"
    elif prediction == "negative":
        return "😠 Sentiment: Negative"
    else:
        return "😐 Sentiment: Neutral"

# Gradio Interface
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=3, label="📝 Enter Tweet Text"),
    outputs=gr.Textbox(label="Prediction"),
    title="🔍 Twitter Sentiment Analyzer",
    description="Analyze the sentiment of tweets: Positive, Negative, or Neutral"
)

# Run locally
if __name__ == "__main__":
    interface.launch()

In [None]:
#the end