<a href="https://colab.research.google.com/github/Chandhu1206/Micro-IT/blob/main/Sentiment_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [6]:
import pandas as pd
import re
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Sample Dataset
data = {
    'text': [
        'The movie was fantastic, I enjoyed every moment.',
        'This is the worst service I have ever received.',
        'The experience was fine, nothing too exciting.',
        'Absolutely love this place!',
        'I would never recommend this to anyone.',
        'It was an average performance.',
        'I am extremely satisfied with the results.',
        'Very disappointed and frustrated.',
        'It met my expectations but didn’t exceed them.',
        'Wonderful atmosphere and friendly staff!',
        'The package arrived, but I haven’t opened it yet.',
        'Service was neither fast nor slow.',
        'The presentation was okay, not too impressive.',
        'I regret buying this product.',
        'Best decision I’ve made this year!'
    ],
    'sentiment': [
        'positive', 'negative', 'neutral', 'positive', 'negative',
        'neutral', 'positive', 'negative', 'neutral', 'positive',
        'neutral', 'neutral', 'neutral', 'negative', 'positive'
    ]
}

# Step 2: Create DataFrame
df = pd.DataFrame(data)

# Step 3: Preprocessing
def clean_text(text):
    text = text.lower()
    text = re.sub(r'[^a-z\s]', '', text)
    return text

df['text'] = df['text'].apply(clean_text)

# Step 4: Split data
X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.3, random_state=42)

# Step 5: Vectorize text using TF-IDF
vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

# Step 6: Train Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train_vec, y_train)

# Step 7: Evaluate
y_pred = model.predict(X_test_vec)
print("Classification Report:\n")
print(classification_report(y_test, y_pred))
print("Accuracy Score:", accuracy_score(y_test, y_pred))

# Step 8: Custom Predictions
custom_inputs = [
    "It is ok, but not the best",
    "You are the worst person",
    "Well done!",
    "Keep it up!",
    "You are cunning and a bad person",
    "The product arrived on time.",
    "It’s neither good nor bad, just better",
    "I have not used it much yet"
]

print("\nCustom Predictions:")
for text in custom_inputs:
    cleaned = clean_text(text)
    vec = vectorizer.transform([cleaned])
    pred = model.predict(vec)[0]
    print(f"Text: {text} --> Predicted Sentiment: {pred}")


Classification Report:

              precision    recall  f1-score   support

    negative       0.00      0.00      0.00         1
     neutral       0.67      1.00      0.80         2
    positive       0.00      0.00      0.00         2

    accuracy                           0.40         5
   macro avg       0.22      0.33      0.27         5
weighted avg       0.27      0.40      0.32         5

Accuracy Score: 0.4

Custom Predictions:
Text: It is ok, but not the best --> Predicted Sentiment: neutral
Text: You are the worst person --> Predicted Sentiment: neutral
Text: Well done! --> Predicted Sentiment: neutral
Text: Keep it up! --> Predicted Sentiment: neutral
Text: You are cunning and a bad person --> Predicted Sentiment: negative
Text: The product arrived on time. --> Predicted Sentiment: neutral
Text: It’s neither good nor bad, just better --> Predicted Sentiment: neutral
Text: I have not used it much yet --> Predicted Sentiment: neutral
