<a href="https://colab.research.google.com/github/Hemant-Mhalsekar/CODTECH/blob/main/CT_Task_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
# Task 2: Sentiment Analysis (TF-IDF + Logistic Regression)

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# 1. Preprocessing: Load & Clean Data
data = {
    'review': [
        'I love this product, it is amazing!',
        'Worst experience ever. Very disappointed.',
        'Absolutely fantastic service!',
        'I will never buy this again.',
        'Best purchase I have made.',
        'Terrible product. Waste of money.'
    ],
    'sentiment': [1, 0, 1, 0, 1, 0]  # 1: Positive, 0: Negative
}

df = pd.DataFrame(data)
print("Sample Data:\n", df)

# 2. TF-IDF Vectorization (Feature Extraction)
tfidf = TfidfVectorizer(stop_words='english')  # removing common stopwords
X = tfidf.fit_transform(df['review'])
y = df['sentiment']

print("\nFeature Names after TF-IDF:\n", tfidf.get_feature_names_out())

# 3. Split the data (Training and Testing)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=42)

# 4. Modeling: Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)

# 5. Prediction
y_pred = model.predict(X_test)

# 6. Evaluation
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

print("\nAccuracy:", acc)
print("\nConfusion Matrix:\n", cm)
print("\nClassification Report:\n", report)


Sample Data:
                                       review  sentiment
0        I love this product, it is amazing!          1
1  Worst experience ever. Very disappointed.          0
2              Absolutely fantastic service!          1
3               I will never buy this again.          0
4                 Best purchase I have made.          1
5          Terrible product. Waste of money.          0

Feature Names after TF-IDF:
 ['absolutely' 'amazing' 'best' 'buy' 'disappointed' 'experience'
 'fantastic' 'love' 'money' 'product' 'purchase' 'service' 'terrible'
 'waste' 'worst']

Accuracy: 0.5

Confusion Matrix:
 [[1 0]
 [1 0]]

Classification Report:
               precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       0.00      0.00      0.00         1

    accuracy                           0.50         2
   macro avg       0.25      0.50      0.33         2
weighted avg       0.25      0.50      0.33         2



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
