In [1]:
# 📌 STEP 1: Install required packages
!pip install gradio --quiet


[notice] A new release of pip is available: 24.0 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import numpy as np
import re
import string
import gradio as gr

from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

In [7]:
# 2. Load and preprocess dataset
columns = ['target', 'id', 'date', 'query', 'user', 'text']
df = pd.read_csv(r"training.1600000.processed.noemoticon.csv", sep=',', header=None, names=columns, encoding='latin-1')
df['target'] = df['target'].apply(lambda x: 1 if x == 4 else 0)  # assuming 4 = positive, 0 = negative

In [11]:
df.head()

Unnamed: 0,target,id,date,query,user,text,clean_text
0,0,1467810369,Mon Apr 06 22:19:45 PDT 2009,NO_QUERY,_TheSpecialOne_,"@switchfoot http://twitpic.com/2y1zl - Awww, t...",awww thats a bummer you shoulda got david carr...
1,0,1467810672,Mon Apr 06 22:19:49 PDT 2009,NO_QUERY,scotthamilton,is upset that he can't update his Facebook by ...,is upset that he cant update his facebook by t...
2,0,1467810917,Mon Apr 06 22:19:53 PDT 2009,NO_QUERY,mattycus,@Kenichan I dived many times for the ball. Man...,i dived many times for the ball managed to sav...
3,0,1467811184,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,ElleCTF,my whole body feels itchy and like its on fire,my whole body feels itchy and like its on fire
4,0,1467811193,Mon Apr 06 22:19:57 PDT 2009,NO_QUERY,Karoli,"@nationwideclass no, it's not behaving at all....",no its not behaving at all im mad why am i her...


In [8]:
# 3. Text preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'@\w+', '', text)
    text = re.sub(r'#\w+', '', text)
    text = re.sub(f"[{re.escape(string.punctuation)}]", '', text)
    text = re.sub(r'\d+', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [9]:
df['clean_text'] = df['text'].apply(clean_text)

In [10]:
# 4. EDA (Quick checks)
print("Positive:", sum(df['target'] == 1))
print("Negative:", sum(df['target'] == 0))

Positive: 800000
Negative: 800000


In [12]:
# 5. Feature extraction
vectorizer = TfidfVectorizer(max_features=5000)
X = vectorizer.fit_transform(df['clean_text'])
y = df['target']

In [13]:
# 6. Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
# 7. Model training
model = LogisticRegression()
model.fit(X_train, y_train)

In [15]:
# 8. Evaluation
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.79019375
              precision    recall  f1-score   support

           0       0.80      0.78      0.79    159494
           1       0.78      0.80      0.79    160506

    accuracy                           0.79    320000
   macro avg       0.79      0.79      0.79    320000
weighted avg       0.79      0.79      0.79    320000



In [16]:
# 9. Gradio Interface
def predict_sentiment(text):
    cleaned = clean_text(text)
    vector = vectorizer.transform([cleaned])
    prediction = model.predict(vector)[0]
    return "Positive 😊" if prediction == 1 else "Negative 😞"

In [17]:
gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(label="Enter a Tweet"),
    outputs=gr.Textbox(label="Predicted Sentiment"),
    title="Sentiment Analyzer",
    description="Analyze the sentiment of social media tweets (Positive or Negative)"
).launch(share=True)


* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://d8bdbbd9fbc976707b.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


