<a href="https://colab.research.google.com/github/Sashidhar-hub/Ai-smart-attendance/blob/main/ML_project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
data = pd.read_csv("/content/sentimentdataset.csv")
print(data.head())
print(data.shape)
print(data.columns)

   Unnamed: 0.1  Unnamed: 0  \
0             0           0   
1             1           1   
2             2           2   
3             3           3   
4             4           4   

                                                Text    Sentiment  \
0   Enjoying a beautiful day at the park!        ...   Positive     
1   Traffic was terrible this morning.           ...   Negative     
2   Just finished an amazing workout! 💪          ...   Positive     
3   Excited about the upcoming weekend getaway!  ...   Positive     
4   Trying out a new recipe for dinner tonight.  ...   Neutral      

             Timestamp            User     Platform  \
0  2023-01-15 12:30:00   User123          Twitter     
1  2023-01-15 08:45:00   CommuterX        Twitter     
2  2023-01-15 15:45:00   FitnessFan      Instagram    
3  2023-01-15 18:20:00   AdventureX       Facebook    
4  2023-01-15 19:55:00   ChefCook        Instagram    

                                     Hashtags  Retweets  Likes     

In [10]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Data Cleaning

In [12]:
import re
import nltk
from nltk.corpus import stopwords
# downloading stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# cleaning text functions
def clean_text(text):
  text = str(text).lower()
  text = re.sub(r"http\s+", "",text)
  text = re.sub(r"@\w+", "", text)     # remove mentions
  text = re.sub(r"#\w+", "", text)     # remove hashtags
  text = re.sub(r"[^a-z\s]", "", text) # remove punctuation & numbers
  text = ' '.join([word for word in text.split() if word not in stop_words])
  return text

# apply cleaning
data["cleaned_text"]= data["Text"].apply(clean_text)
print(data[["Text","cleaned_text"]].head())

                                                Text  \
0   Enjoying a beautiful day at the park!        ...   
1   Traffic was terrible this morning.           ...   
2   Just finished an amazing workout! 💪          ...   
3   Excited about the upcoming weekend getaway!  ...   
4   Trying out a new recipe for dinner tonight.  ...   

                       cleaned_text  
0       enjoying beautiful day park  
1          traffic terrible morning  
2          finished amazing workout  
3  excited upcoming weekend getaway  
4  trying new recipe dinner tonight  


[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import LabelEncoder

tfidf = TfidfVectorizer(max_features=5000)
X = tfidf.fit_transform(data["cleaned_text"])

le = LabelEncoder()
y = le.fit_transform(data["Sentiment"])

In [15]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))

# Get the unique labels present in y_test
unique_test_labels = np.unique(y_test)

# Filter target_names based on unique labels in y_test
filtered_target_names = [le.classes_[i] for i in unique_test_labels]

print(classification_report(y_test, y_pred, labels=unique_test_labels, target_names=filtered_target_names, zero_division=0))

Accuracy: 0.12244897959183673
                        precision    recall  f1-score   support

         Acceptance          0.00      0.00      0.00         2
           Admiration        0.00      0.00      0.00         1
        Admiration           0.00      0.00      0.00         1
         Affection           0.00      0.00      0.00         1
      Ambivalence            0.00      0.00      0.00         1
         Anger               0.00      0.00      0.00         1
        Anticipation         0.00      0.00      0.00         1
        Arousal              0.00      0.00      0.00         3
                  Awe        0.00      0.00      0.00         1
         Awe                 0.00      0.00      0.00         1
                  Bad        0.00      0.00      0.00         1
             Betrayal        0.00      0.00      0.00         2
        Betrayal             0.00      0.00      0.00         1
         Bitter              0.00      0.00      0.00         1
         

In [16]:
def predict_sentiment(text):
    # Preprocess the text
    cleaned_text = clean_text(text)

    # Vectorize the text
    text_vector = tfidf.transform([cleaned_text])

    # Predict sentiment
    prediction = model.predict(text_vector)[0]
    probability = model.predict_proba(text_vector)[0]

    # Convert predicted label back to sentiment string
    predicted_sentiment = le.classes_[prediction]


    return predicted_sentiment, max(probability)

# Test with sample texts
test_texts = [
    "I love this new feature on Facebook!",
    "This platform is getting worse day by day",
    "The new update is okay, not great but not bad either"
]

for text in test_texts:
    sentiment, confidence = predict_sentiment(text)
    print(f"Text: {text}")
    print(f"Predicted Sentiment: {sentiment} (Confidence: {confidence:.2f})\n")

Text: I love this new feature on Facebook!
Predicted Sentiment:  Positive   (Confidence: 0.14)

Text: This platform is getting worse day by day
Predicted Sentiment:  Positive   (Confidence: 0.13)

Text: The new update is okay, not great but not bad either
Predicted Sentiment:  Positive   (Confidence: 0.10)



In [17]:
from textblob import TextBlob

def textblob_sentiment(text):
    blob = TextBlob(str(text))  # Ensure text is string
    polarity = blob.sentiment.polarity

    if polarity > 0.1:
        return 'positive'
    elif polarity < -0.1:
        return 'negative'
    else:
        return 'neutral'

# Compare with TextBlob
data['textblob_sentiment'] = data['Text'].apply(textblob_sentiment)

print("Comparison between trained model and TextBlob:")
comparison = data[['Text', 'Sentiment', 'textblob_sentiment']].head(10)
display(comparison)

Comparison between trained model and TextBlob:


Unnamed: 0,Text,Sentiment,textblob_sentiment
0,Enjoying a beautiful day at the park! ...,Positive,positive
1,Traffic was terrible this morning. ...,Negative,negative
2,Just finished an amazing workout! 💪 ...,Positive,positive
3,Excited about the upcoming weekend getaway! ...,Positive,positive
4,Trying out a new recipe for dinner tonight. ...,Neutral,positive
5,Feeling grateful for the little things in lif...,Positive,negative
6,Rainy days call for cozy blankets and hot coc...,Positive,neutral
7,The new movie release is a must-watch! ...,Positive,positive
8,Political discussions heating up on the timel...,Negative,neutral
9,Missing summer vibes and beach days. ...,Neutral,negative
