In [1]:
# Sentiment Chatbot using NLTK + HuggingFace Transformers

# Import libraries
import nltk
import torch
from nltk.tokenize import word_tokenize
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Download required NLTK data
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('punkt')

# Load pre-trained sentiment analysis model
model_name = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)

# Define chatbot function
def chatbot(user_input: str) -> str:
    inputs = tokenizer(user_input, return_tensors='pt')

    with torch.no_grad():
        logits = model(**inputs).logits
        probs = torch.nn.functional.softmax(logits, dim=1)
        positive_prob = probs[:, 1].item()
        negative_prob = probs[:, 0].item()

    threshold = 0.6
    if positive_prob > threshold:
        return "You seem happy! What's making you smile?"
    elif negative_prob > threshold:
        return "Sorry to hear that. Would you like to talk about it?"
    else:
        return "That's interesting! Can you tell me more?"

# Run chatbot
if __name__ == "__main__":
    for _ in range(3):  # 3 rounds of conversation
        user_input = input("You: ")
        print("Bot:", chatbot(user_input))


[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/629 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/268M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

You: hi!!! got an icecream
Bot: You seem happy! What's making you smile?
You: yeahh
Bot: You seem happy! What's making you smile?
You: dtgdc
Bot: Sorry to hear that. Would you like to talk about it?
