# 💬 Corrected Sentiment Analysis using TF-IDF + Logistic Regression
This version improves prediction accuracy for positive/negative sentiment using better preprocessing and model.

In [None]:
# ✅ Step 1: Install Required Libraries
!pip install -q gradio nltk sklearn

In [None]:
# ✅ Step 2: Import Libraries
import pandas as pd
import numpy as np
import nltk
import gradio as gr
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import re
nltk.download('stopwords')
from nltk.corpus import stopwords

In [None]:
# ✅ Step 3: Load Dataset
url = "https://raw.githubusercontent.com/dD2405/Twitter_Sentiment_Analysis/master/train.csv"
df = pd.read_csv(url)
df = df[['tweet', 'label']]
df.columns = ['text', 'label']
df['label'].value_counts()  # Check class distribution

In [None]:
# ✅ Step 4: Preprocess the Text
def clean_text(text):
    text = re.sub(r'@[A-Za-z0-9_]+', '', text)
    text = re.sub(r'#', '', text)
    text = re.sub(r'RT[\s]+', '', text)
    text = re.sub(r'https?:\/\/\S+', '', text)
    text = re.sub(r'\W', ' ', text)
    text = text.lower()
    text = ' '.join([word for word in text.split() if word not in stopwords.words('english')])
    return text

df['text'] = df['text'].apply(clean_text)

In [None]:
# ✅ Step 5: Feature Extraction and Model Training
X = df['text']
y = df['label']

vectorizer = TfidfVectorizer()
X_vec = vectorizer.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_vec, y, test_size=0.2, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, pred))
print(classification_report(y_test, pred))

In [None]:
# ✅ Step 6: Prediction Function
def predict_sentiment(text):
    text = clean_text(text)
    vector = vectorizer.transform([text])
    prediction = model.predict(vector)[0]
    return "Positive 😊" if prediction == 1 else "Negative 😠"

In [None]:
# ✅ Step 7: Gradio Interface
interface = gr.Interface(
    fn=predict_sentiment,
    inputs=gr.Textbox(lines=2, placeholder="Enter a review or tweet here..."),
    outputs="text",
    title="Sentiment Analysis (Improved Version)",
    description="This app uses Logistic Regression + TF-IDF for better accuracy."
)

interface.launch()