In [1]:
!pip install transformers
!pip install torch
!pip install pandas



In [2]:
ls

[0m[01;34mlost+found[0m/  messages_20240520.xlsx  practicum_cb.ipynb


In [3]:
import pandas as pd
from transformers import GPTNeoForCausalLM, GPT2Tokenizer
import torch
from sklearn.model_selection import train_test_split

# Load data
data = pd.read_excel('messages_20240520.xlsx')

# Preprocess data
data['Date'] = pd.to_datetime(data['Date'])
data = data.dropna(subset=['Text'])  # Drop rows with missing text
data['Text'] = data['Text'].astype(str)  # Ensure text column is string type

# Split data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

In [4]:
train_data

Unnamed: 0,Date,Contact UUID,Contact Name,URN Scheme,URN Value,Flow,Direction,Text,Attachments,Status,Channel,Labels
756111,2024-01-17 10:31:51,35ca175a-e695-43ab-b09a-4a7142b9c7e4,friend,tel,27632508008,AFFIRM-2-part1,IN,👍🏽Yes,,handled,Moya,
349469,2023-12-31 15:07:37,124f3e91-1ec8-4a15-8e31-73a312fa89b8,friend,tel,27810071650,SCREENER-normal,IN,Tell me,,handled,Moya,
816143,2024-01-18 20:59:44,de4271c8-91ad-437d-ad5f-71bc6c747815,friend,tel,27612420701,Stories-surprise,IN,Next 👉🏽,,handled,Moya,
269993,2023-12-21 20:10:56,5b2d2fcf-5d4e-4fed-9883-9533a54a094f,friend,tel,27640365420,BETTER-menu,IN,👍🏽Yes,,handled,Moya,
53587,2023-12-04 05:29:22,71499b9d-e50d-459b-afbe-5066424b0284,friend,tel,27604023555,BETTER-breathing-menu,IN,🧠 Box,,handled,Moya,
...,...,...,...,...,...,...,...,...,...,...,...,...
259444,2023-12-20 17:32:16,61452fd2-ada6-4e24-9dcb-540c4218cb8f,friend,tel,27641791761,BETTER-menu,IN,🌈 FanaFana Fundi,,handled,Moya,
366139,2024-01-02 18:11:30,6ea2207e-ca08-4833-9442-f91d3509ab24,friend,tel,27648192647,FORGET,IN,Forget,,handled,Moya,
132179,2023-12-09 18:50:33,074b79ef-fc5c-40eb-87f8-3180f00770f0,friend,tel,27656610248,Main-Menu-04-ReducedMenu4Options,IN,Read stories 📘,,handled,Moya,
671513,2024-01-15 11:24:32,48bde2c6-d9a0-43ae-883c-ad1b0c592644,friend,tel,27676679745,Main-Menu-04-ReducedMenu4Options,IN,Health quizzes 🧠,,handled,Moya,


In [5]:
# Initialize the GPT-Neo model and tokenizer
model_name = "EleutherAI/gpt-neo-2.7B"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPTNeoForCausalLM.from_pretrained(model_name)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

GPTNeoForCausalLM(
  (transformer): GPTNeoModel(
    (wte): Embedding(50257, 2560)
    (wpe): Embedding(2048, 2560)
    (drop): Dropout(p=0.0, inplace=False)
    (h): ModuleList(
      (0-31): 32 x GPTNeoBlock(
        (ln_1): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (attn): GPTNeoAttention(
          (attention): GPTNeoSelfAttention(
            (attn_dropout): Dropout(p=0.0, inplace=False)
            (resid_dropout): Dropout(p=0.0, inplace=False)
            (k_proj): Linear(in_features=2560, out_features=2560, bias=False)
            (v_proj): Linear(in_features=2560, out_features=2560, bias=False)
            (q_proj): Linear(in_features=2560, out_features=2560, bias=False)
            (out_proj): Linear(in_features=2560, out_features=2560, bias=True)
          )
        )
        (ln_2): LayerNorm((2560,), eps=1e-05, elementwise_affine=True)
        (mlp): GPTNeoMLP(
          (c_fc): Linear(in_features=2560, out_features=10240, bias=True)
          (c_proj)

In [6]:
# Prompt Engineering
def create_prompt(text):
    return f"Classify the following message for mental health concerns: {text}"

def sentiment_prompt(text):
    return f"Analyze the sentiment of the following message: {text}"

# Real-time Classification and Automated Response
def classify_message(message):
    prompt = create_prompt(message)
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    classification = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return classification

def analyze_sentiment(message):
    prompt = sentiment_prompt(message)
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    sentiment = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return sentiment

def automated_response(classification, sentiment):
    if 'acute' in classification:
        return "Alerting moderator: Immediate attention needed!"
    elif 'positive' in sentiment:
        return "Thank you for your positive message!"
    else:
        return "We are here to support you. Please reach out if you need help."

def process_message(message):
    classification = classify_message(message)
    sentiment = analyze_sentiment(message)
    response = automated_response(classification, sentiment)
    return response

# Alerting Moderators and Personalization
def alert_moderator(message, classification):
    if 'acute' in classification:
        print(f"Moderator Alert: {message}")

def personalized_response(message, sentiment):
    if 'negative' in sentiment:
        return f"We noticed you're feeling down. We're here for you. {message}"
    else:
        return f"Thank you for your message. {message}"

In [7]:
# Example usage
message = "I feel really depressed and don't know what to do."
classification = classify_message(message)
sentiment = analyze_sentiment(message)
response = automated_response(classification, sentiment)
alert_moderator(message, classification)
print(personalized_response(response, sentiment))

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Thank you for your message. We are here to support you. Please reach out if you need help.


In [9]:
# Prompt Engineering
def create_prompt(text):
    return f"Classify the following message for mental health concerns: {text}"

def sentiment_prompt(text):
    return f"Analyze the sentiment of the following message: {text}"

# Real-time Classification and Automated Response
def classify_message(message):
    prompt = create_prompt(message)
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    classification = tokenizer.decode(outputs[0].to('cpu'), skip_special_tokens=True)  # Move output to CPU
    return classification

def analyze_sentiment(message):
    prompt = sentiment_prompt(message)
    inputs = tokenizer(prompt, return_tensors='pt').to(device)
    outputs = model.generate(**inputs, max_new_tokens=50)
    sentiment = tokenizer.decode(outputs[0].to('cpu'), skip_special_tokens=True)  # Move output to CPU
    return sentiment

def automated_response(classification, sentiment):
    if 'acute' in classification:
        return "Alerting moderator: Immediate attention needed!"
    elif 'positive' in sentiment:
        return "Thank you for your positive message!"
    else:
        return "We are here to support you. Please reach out if you need help."

def process_message(message):
    classification = classify_message(message)
    sentiment = analyze_sentiment(message)
    response = automated_response(classification, sentiment)
    return response

# Alerting Moderators and Personalization
def alert_moderator(message, classification):
    if 'acute' in classification:
        print(f"Moderator Alert: {message}")

def personalized_response(message, sentiment):
    if 'negative' in sentiment:
        return f"We noticed you're feeling down. We're here for you. {message}"
    else:
        return f"Thank you for your message. {message}"

# Example usage
message = "I feel really depressed and don't know what to do."
classification = classify_message(message)
sentiment = analyze_sentiment(message)
response = automated_response(classification, sentiment)
alert_moderator(message, classification)
print(personalized_response(response, sentiment))

# Testing and Optimization
def test_chatbot(test_data):
    # This function is for demonstration purposes
    for _, row in test_data.iterrows():
        message = row['Text']
        classification = classify_message(message)
        sentiment = analyze_sentiment(message)
        print(f"Message: {message}")
        print(f"Classification: {classification}")
        print(f"Sentiment: {sentiment}")
        print(f"Response: {automated_response(classification, sentiment)}")
        print()

# Test the chatbot with a subset of test data
test_chatbot(test_data.head(10))  # Test with the first 10 rows of the test set

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Thank you for your message. We are here to support you. Please reach out if you need help.


Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: 🧠 Tests
Classification: Classify the following message for mental health concerns: 🧠 Tests are normal. 🧠 I am fine. 🧠 I am not fine. 🧠 I am not sure. 🧠 I am not sure. 🧠 I am not sure. 🧠 I am not sure.
Sentiment: Analyze the sentiment of the following message: 🧠 Tests are running 🧠

The sentiment of the message is positive.

The sentiment of the message is negative.

The sentiment of the message is neutral.

The sentiment of the message is positive.

The sentiment of
Response: Thank you for your positive message!



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: More
Classification: Classify the following message for mental health concerns: More than half of the people who died in the U.S. in 2011 were not reported to the National Vital Statistics System, according to a new study.

The study, published in the journal Health Affairs, found that more than half of the
Sentiment: Analyze the sentiment of the following message: More than a year after the U.S. government shut down, the country is still in a state of uncertainty.

The U.S. government shutdown is the longest in U.S. history.

The government shutdown is the longest
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: RELAX
Classification: Classify the following message for mental health concerns: RELAX.

The following message is for mental health concerns: RELAX.

The following message is for mental health concerns: RELAX.

The following message is for mental health concerns: RELAX.

The following message is for
Sentiment: Analyze the sentiment of the following message: RELAXED

Relaxed

Relaxed

Relaxed

Relaxed

Relaxed

Relaxed

Relaxed

Relaxed

Relaxed

Relax
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: started 
Classification: Classify the following message for mental health concerns: started __________, stopped __________, and changed __________.

**A.**

**B.**

**C.**

**D.**

**.** Which of the following is the best
Sentiment: Analyze the sentiment of the following message: started 

Analyze the sentiment of the following message: started

Analyze the sentiment of the following message: started

Analyze the sentiment of the following message: started

Analyze the sentiment of the following message: started


Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: Hey FanaFana
Classification: Classify the following message for mental health concerns: Hey FanaFana, I'm worried about you. I'm worried about you because you're not taking your medication. I'm worried about you because you're not taking your medication. I'm worried about you because you're not taking your medication. I'm worried about
Sentiment: Analyze the sentiment of the following message: Hey FanaFana, I'm a new user of your site and I'm really enjoying it. I'm looking forward to reading more of your stories. I hope you don't mind if I send you a message. I'm a new user of your site and I
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: More
Classification: Classify the following message for mental health concerns: More than half of the people who died in the U.S. in 2011 were not reported to the National Vital Statistics System, according to a new study.

The study, published in the journal Health Affairs, found that more than half of the
Sentiment: Analyze the sentiment of the following message: More than a year after the U.S. government shut down, the country is still in a state of uncertainty.

The U.S. government shutdown is the longest in U.S. history.

The government shutdown is the longest
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: fsg I don't understand what I'm saying 
Classification: Classify the following message for mental health concerns: fsg I don't understand what I'm saying                                                   
Sentiment: Analyze the sentiment of the following message: fsg I don't understand what I'm saying 

The sentiment of the message is: negative

The sentiment of the message is: negative

The sentiment of the message is: negative

The sentiment of the message is: negative

The sentiment of the message is: negative
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: 👍🏽Yes
Classification: Classify the following message for mental health concerns: 👍🏽Yes, I am worried about my mental health. 👍🏽No, I am not worried about my mental health. 👍🏽I am worried about my mental health. 👍🏽I am not worried about my mental
Sentiment: Analyze the sentiment of the following message: 👍🏽Yes, I agree. 👍🏽No, I disagree. 👍🏽I don't know. 👍🏽I don't care. 👍🏽I don't know. 👍🏽I
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: 👎🏽No
Classification: Classify the following message for mental health concerns: 👎🏽No, I don't have any mental health concerns. 👎🏽No, I don't have any mental health concerns. 👎🏽No, I don't have any mental health concerns. 👎🏽No, I
Sentiment: Analyze the sentiment of the following message: 👎🏽No, I don't want to be a part of your team. 👎🏽No, I don't want to be a part of your team. 👎🏽No, I don't want to be a part of your team
Response: We are here to support you. Please reach out if you need help.



Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Message: 🔴 Session 1
Classification: Classify the following message for mental health concerns: 🔴 Session 1: I am feeling anxious. 🔴 Session 2: I am feeling anxious. 🔴 Session 3: I am feeling anxious. 🔴 Session 4: I am feeling anxious. 🔴 Session 5: I am feeling anxious.
Sentiment: Analyze the sentiment of the following message: 🔴 Session 1: 🔴 Session 2: 🔴 Session 3: 🔴 Session 4: 🔴 Session 5: 🔴 Session 6: 🔴 Session 7: 🔴 Session 8: 🔴 Session 9: �
Response: We are here to support you. Please reach out if you need help.



In [None]:
import openai
import pandas as pd
from sklearn.model_selection import train_test_split

# Load your OpenAI API key
openai.api_key = 'YOUR_OPENAI_API_KEY'

# Load data
data = pd.read_excel('/path_to_your_file.xlsx')

# Preprocess data
data['Date'] = pd.to_datetime(data['Date'])
data = data.dropna(subset=['Text'])  # Drop rows with missing text
data['Text'] = data['Text'].astype(str)  # Ensure text column is string type

# Split data
train_data, test_data = train_test_split(data, test_size=0.2, random_state=42)

# Define functions to interact with GPT-3.5
def create_classification_prompt(text):
    return f"Classify the mental health concern in the following message: {text}"

def create_sentiment_prompt(text):
    return f"Analyze the sentiment of the following message: {text}"

def gpt3_completion(prompt):
    response = openai.Completion.create(
        model="text-davinci-003",  # Use GPT-3.5 model
        prompt=prompt,
        max_tokens=50,
        n=1,
        stop=None,
        temperature=0.7
    )
    return response.choices[0].text.strip()

def classify_message(message):
    prompt = create_classification_prompt(message)
    classification = gpt3_completion(prompt)
    return classification

def analyze_sentiment(message):
    prompt = create_sentiment_prompt(message)
    sentiment = gpt3_completion(prompt)
    return sentiment

def automated_response(classification, sentiment):
    if 'acute' in classification.lower():
        return "Alerting moderator: Immediate attention needed!"
    elif 'positive' in sentiment.lower():
        return "Thank you for your positive message!"
    else:
        return "We are here to support you. Please reach out if you need help."

def process_message(message):
    classification = classify_message(message)
    sentiment = analyze_sentiment(message)
    response = automated_response(classification, sentiment)
    return response

# Alerting Moderators and Personalization
def alert_moderator(message, classification):
    if 'acute' in classification.lower():
        print(f"Moderator Alert: {message}")

def personalized_response(message, sentiment):
    if 'negative' in sentiment.lower():
        return f"We noticed you're feeling down. We're here for you. {message}"
    else:
        return f"Thank you for your message. {message}"

# Testing and Optimization
def test_chatbot(test_data, batch_size=10):
    # Process data in batches
    for i in range(0, len(test_data), batch_size):
        batch = test_data.iloc[i:i+batch_size]
        for _, row in batch.iterrows():
            message = row['Text']
            classification = classify_message(message)
            sentiment = analyze_sentiment(message)
            response = automated_response(classification, sentiment)
            alert_moderator(message, classification)
            print(f"Message: {message}")
            print(f"Classification: {classification}")
            print(f"Sentiment: {sentiment}")
            print(f"Response: {response}")
            print()

# Use a smaller subset for initial testing
test_chatbot(test_data.head(100), batch_size=10)  # Test with the first 100 rows of the test set in batches of 10
