In [52]:
import pandas as pd
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer

In [54]:
import nltk
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/arshpurohit/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/arshpurohit/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/arshpurohit/nltk_data...


True

In [56]:
# Replace 'file_path' with the actual path to your CSV file
conversations = pd.read_csv(r"/Users/arshpurohit/Downloads/reddit_text-davinci-002.csv")

# Check if the data loaded properly
print(conversations.head())


                                              prompt  \
0  Me and the father of my child have been dating...   
1  I can't seem to feel any emotion except anxiet...   
2  why do we allow one or few bad experiences whe...   
3  I have major depression, severe, PTSD, anxiety...   
4                              do i need a therapist   

                                          completion  
0  It sounds like you are feeling very alone and ...  
1  It is possible that you are experiencing sympt...  
2  There are a few possible explanations for why ...  
3  It is understandable that you are feeling upse...  
4  There is no one-size-fits-all answer to this q...  


In [62]:
# Preprocess function to clean and preprocess the text
def preprocess_text(text):
    # Lowercase the text
    text = text.lower()
    
    # Remove punctuation and numbers
    text = re.sub(r'[^\w\s]', '', text)
    
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    word_tokens = word_tokenize(text)
    filtered_words = [word for word in word_tokens if word not in stop_words]
    
    # Lemmatize words (optional)
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_words]
    
    # Join the cleaned words back into a single string
    return ' '.join(lemmatized_words)

# Test the preprocess function on a sample string
sample_text = "This is a sample text to test the preprocess function."
print(preprocess_text(sample_text))

sample text test preprocess function


In [64]:
# Make sure that 'prompt' exists in the DataFrame before proceeding
if 'prompt' in conversations.columns:
    conversations['cleaned_prompt'] = conversations['prompt'].fillna('').apply(preprocess_text)
    print(conversations[['prompt', 'cleaned_prompt']].head())  # Show original and cleaned data
else:
    print("Column 'prompt' not found in DataFrame")

                                              prompt  \
0  Me and the father of my child have been dating...   
1  I can't seem to feel any emotion except anxiet...   
2  why do we allow one or few bad experiences whe...   
3  I have major depression, severe, PTSD, anxiety...   
4                              do i need a therapist   

                                      cleaned_prompt  
0  father child dating year big argument first wa...  
1         cant seem feel emotion except anxiety even  
2  allow one bad experience felt worthless dictat...  
3  major depression severe ptsd anxiety disorder ...  
4                                     need therapist  


In [66]:
# Initialize vectorizer and fit on the preprocessed prompts
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(conversations["cleaned_prompt"])

# Check the shape of the tfidf_matrix to confirm successful transformation
print(tfidf_matrix.shape)

(1522, 3671)


In [68]:
# Function to get the best response based on user input
def get_best_response(user_input, df, tfidf_matrix, vectorizer):
    # Preprocess user input
    cleaned_input = preprocess_text(user_input)
    
    # Transform user input into vector
    user_input_vector = vectorizer.transform([cleaned_input])
    
    # Calculate cosine similarity
    cosine_similarities = cosine_similarity(user_input_vector, tfidf_matrix)
    
    # Get index of the best match
    best_match_index = cosine_similarities.argmax()
    
    # Check if the best match is actually similar (optional threshold)
    if cosine_similarities[0][best_match_index] < 0.1:  # Adjust threshold as needed
        return "I'm sorry, I don't understand your question."
    
    # Return the best matched response
    return df.iloc[best_match_index]["completion"]

# Test the function with a sample user input
sample_input = "I'm feeling sad today"
response = get_best_response(sample_input, conversations, tfidf_matrix, vectorizer)
print(response)

There could be many reasons why you are feeling sad. It could be due to a recent loss, or something that is happening in your life that is causing you stress. It is also possible that you are simply feeling down for no specific reason. If you are feeling persistently sad, it might be a good idea to talk to a doctor or therapist, who can help you figure out what might be causing your sadness and how to deal with it.


In [None]:
# Run the chatbot
while True:
    user_input = input("Ask me something: ")
    if user_input.lower() == 'exit':
        print("Exiting chatbot...")
        break
    response = get_best_response(user_input, conversations, tfidf_matrix, vectorizer)
    print(response)

Ask me something:  I got fired from my facebook app developing job and I was making good money


There are a number of ways to get help without spending any money. One option is to look for free or low-cost services in your community, such as support groups or counseling services. You can also search online for self-help resources, such as articles or blog posts about coping with difficult situations. Finally, you can talk to friends or family members for support and advice.


Ask me something:  I don't like how my day is going


There is no one definitive answer to this question. It could be a variety of things, ranging from a mental health issue to a physical health issue. If you're concerned about what is going on with you, it's best to consult with a doctor or mental health professional to get a more specific answer.


Ask me something:  I got late to work and my boss was mad


Yes, this is an example of co-dependency. Co-dependency is when someone is excessively reliant on another person for support and validation. In this case, you are excessively reliant on your boss for feedback and approval. This can be harmful to both you and your boss, as it can create a power imbalance and prevent you from developing as an independent thinker and worker. If you think you may be co-dependent, it is important to seek professional help to learn how to develop healthier coping mechanisms and relationships.
