In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from datasets import Dataset

data = pd.read_csv("C:\\Users\\Prathima\\Desktop\\5th_sem_project\\final_dataset.csv")
print("Original class distribution:")
print(data["class_value"].value_counts())

class_0 = data[data["class_value"] == 0]
class_1 = data[data["class_value"] == 1]

# Downsample class 0 
class_0_downsampled = class_0.sample(n=74, random_state=42)

# Concatenate 
balanced_data = pd.concat([class_0_downsampled, class_1], axis=0)
balanced_data = balanced_data.rename(columns={"class_value": "label", "concatenated_text": "text"})

balanced_data = balanced_data.sample(frac=1, random_state=42).reset_index(drop=True)
print("\nBalanced class distribution:")
print(balanced_data["label"].value_counts())

train_data, eval_data  = train_test_split(balanced_data, stratify=balanced_data["label"],test_size=0.2, random_state=42)
train_dataset = Dataset.from_pandas(train_data)
eval_dataset = Dataset.from_pandas(eval_data)

print(f"Training samples: {train_data.shape}, Test samples: {eval_data.shape}")

Original class distribution:
class_value
0    114
1     74
Name: count, dtype: int64

Balanced class distribution:
label
1    74
0    74
Name: count, dtype: int64
Training samples: (118, 2), Test samples: (30, 2)


In [2]:
train_dataset = train_dataset.remove_columns("__index_level_0__")
eval_dataset = eval_dataset.remove_columns("__index_level_0__")


In [None]:
from setfit import SetFitModel, SetFitTrainer,TrainingArguments
from sentence_transformers.losses import CosineSimilarityLoss

# Load a SetFit model from Hugging Face
model = SetFitModel.from_pretrained("sentence-transformers/all-mpnet-base-v2")

# Check if the model is loaded correctly
print(f"Loaded model: {model}")
trainer = SetFitTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    loss_class=CosineSimilarityLoss,
    batch_size=16,
    num_iterations=20, # Number of text pairs to generate for contrastive learning
    num_epochs=1 # Number of epochs to use for contrastive learning
)
trainer.train()


In [None]:
from setfit import SetFitModel
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
import numpy as np
# Assuming the text column is named 'text' and the target label column is named 'label'
texts = eval_data['concatenated_text'].values
true_labels = eval_data['class_value'].values

model = SetFitModel.from_pretrained('/content/setfit-finetuned-model')
# Make predictions using your model
predicted_labels = model.predict(texts)

# Evaluate the model performance
accuracy = accuracy_score(true_labels, predicted_labels)
f1 = f1_score(true_labels, predicted_labels, average='weighted')  # Use 'weighted' if multi-class
precision = precision_score(true_labels, predicted_labels, average='weighted')
recall = recall_score(true_labels, predicted_labels, average='weighted')

# Print the evaluation metrics
print(f'Accuracy: {accuracy:.4f}')
print(f'F1 Score: {f1:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')

In [None]:
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from kneed import KneeLocator
import matplotlib.pyplot as plt
from setfit import SetFitModel
import os

def find_optimal_k(embeddings, max_k=10):
    wcss = []
    for k in range(1, max_k + 1):
        kmeans = KMeans(n_clusters=k, random_state=42)
        kmeans.fit(embeddings)
        wcss.append(kmeans.inertia_)
    
    # Use kneed to find the elbow point
    kn = KneeLocator(range(1, max_k + 1), wcss, curve='convex', direction='decreasing')
    optimal_k = kn.knee
    
    if optimal_k is None:
        optimal_k = max_k  # Default to max_k if elbow not found
    
    # Ensure optimal_k is at least 1 and does not exceed the number of samples
    optimal_k = max(1, min(optimal_k, len(embeddings)))
    
    return optimal_k, wcss

def generate_text_prototypes(embeddings, labels, texts, output_dir="./prototypes", max_k=10):
    """Function to generate the prototypes"""
    os.makedirs(output_dir, exist_ok=True)
    
    for label in np.unique(labels):
        class_embeddings = embeddings[np.array(labels) == label]
        class_texts = np.array(texts)[np.array(labels) == label]
        
        # Find optimal K
        optimal_k, wcss = find_optimal_k(class_embeddings, max_k)
        
        # Plot elbow curve
        plt.figure()
        plt.plot(range(1, max_k + 1), wcss, marker='o')
        plt.xlabel('Number of clusters (K)')
        plt.ylabel('Within-Cluster Sum of Squares (WCSS)')
        plt.title(f'Elbow Method for Class {label}')
        plt.savefig(f"{output_dir}/class_{label}_elbow_plot.png")
        plt.close()
        
        # Perform K-means with optimal K (setting to 5 to ensure uniformity between both classes)
        kmeans = KMeans(n_clusters=5, random_state=42)
        kmeans.fit(class_embeddings)
        
        # Find the closest text to each centroid
        closest_texts = []
        for centroid in kmeans.cluster_centers_:
            distances = np.linalg.norm(class_embeddings - centroid, axis=1)
            closest_idx = np.argmin(distances)
            closest_texts.append(class_texts[closest_idx])
            
        # Save prototypes
        with open(f"{output_dir}/class_{label}_prototypes.txt", "w") as f:
            for idx, text in enumerate(closest_texts):
                f.write(f"Prototype {idx + 1}:\n{text}\n\n")

file_path = "C:\\Users\\Prathima\\Desktop\\5th_sem_project\\final_dataset.csv"
data = pd.read_csv(file_path)

texts = data["concatenated_text"].tolist()
labels = data["class_value"].tolist()

model = SetFitModel.from_pretrained("C:\\Users\\Prathima\\Desktop\\5th_sem_project\\finetuned_model")
embeddings = model.encode(texts)  # Generate embeddings for all text in the dataset

generate_text_prototypes(embeddings, labels, texts, output_dir="./prototypes", max_k=10)

In [16]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from setfit import SetFitModel
label_to_class = {0: "not depressed", 1: "depressed"}

model = SetFitModel.from_pretrained("C:\\Users\\Prathima\\Desktop\\5th_sem_project\\finetuned_model")  # Replace with your model path

def load_prototypes_from_file(label):
    """Load prototypes for the given label from the corresponding text file."""
    file_path = f"C:\\Users\\Prathima\\Desktop\\5th_sem_project\\class_{label}_prototypes.txt"
    with open(file_path, "r") as f:
        prototypes = [line.strip() for line in f if line.strip() and not line.startswith("Prototype")]
    return prototypes

def classify_and_explain(text):
    """Classify input text and explain the prediction using the closest prototype."""
    predicted_label = model.predict([text])[0].item()
    class_name = label_to_class[predicted_label]

    prototypes = load_prototypes_from_file(predicted_label)
    text_embedding = model.encode([text]).reshape(1, -1)

    similarities = [
        cosine_similarity(text_embedding, model.encode([proto]).reshape(1, -1))[0][0]
        for proto in prototypes
    ]

    closest_prototype_idx = np.argmax(similarities)
    closest_prototype = prototypes[closest_prototype_idx]
    closest_similarity = similarities[closest_prototype_idx]

    explanation = (
        f"The input text was classified as {class_name}. "
        f"It is most similar to the following prototype:\n\n"
        f"\"{closest_prototype}\"\n\n"
        f"Similarity Score: {closest_similarity:.2f}"
    )

    return predicted_label, closest_prototype, closest_similarity, explanation

input_text = "Yes, I’m doing fine. I’m originally from Indiana, but I moved to LA about ten years ago because I don’t like the cold weather—LA has great weather, the ocean, and lots to do. One of my favorite memories was spending a day on the Catalina Islands for my birthday. I’ve had a lot of changes in my life, like leaving Indiana for good, which felt huge at the time. I’ve been pursuing a career in filmmaking, something I haven’t done yet but really want to, although I can be shy and have trouble trusting people because of my past. When I’m stressed, I watch TV or go to the movies, and I don’t really argue or lose my temper. I try to avoid stress and push people away, which I regret because it makes it hard to form connections. I don’t have a close relationship with my family; we don’t talk much, and I often feel isolated. Sometimes I struggle to sleep, and racing thoughts or stress keep me up, but I try to manage it with music and relaxation. I’ve been feeling down lately, wishing my life had turned out differently, especially when it comes to having friends or a real relationship. I find comfort in playing online games with avatars where people don’t know who I am. My ideal weekend is playing games, going to the movies, or driving somewhere new to explore. If I could go back in time, I’d tell myself to keep a journal and make different choices. I regret how my life turned out and wish I had more stability, but I’m proud of being self-taught and consider myself a genius at figuring things out. Thanks for listening, and have a good day."
predicted_label, closest_prototype, similarity_score, explanation = classify_and_explain(input_text)

print(explanation)


The input text was classified as depressed. It is most similar to the following prototype:

"Hi. Yes, today is a wonderful day, and I am doing absolutely marvelous. I was born in Cleveland, Ohio, and raised in Tucson, Arizona. I moved to Los Angeles when I was sixteen years old. Iâ€™ve only been back to Cleveland once since leaving. Itâ€™s quite different now, a whole new community. Growing up in Cleveland gave me a great sense of community, which I feel is missing in Los Angeles. My grandmother passed away and left property for my father to manage, so we relocated here. Since I was young and naturally good at meeting new people, it wasnâ€™t hard for me to adjust. Moving has never been difficult for me because I love traveling and experiencing new places. Meeting new people always felt like an adventure. One of my fondest memories comes from 1964 when I went to visit my grandmother in Cleveland. While I was there, I had the chance to visit an Indian reservation. Later, I wrote to them 

In [17]:
import torch
import nltk
from setfit import SetFitModel
from nltk.sentiment import SentimentIntensityAnalyzer

# Download NLTK dependencies
nltk.download("vader_lexicon")
nltk.download("punkt_tab")

# Initialize the sentiment analyzer
sia = SentimentIntensityAnalyzer()

# Load the fine-tuned SetFit model
model = SetFitModel.from_pretrained("C:\\Users\\Prathima\\Desktop\\5th_sem_project\\finetuned_model")  # Replace with your model path

# Tokenize text into sentences
def split_into_sentences(text):
    return nltk.sent_tokenize(text)

def classify_and_explain(text):
    predicted_label = model.predict([text])[0].item()
    label_to_class = {0: "not depressed", 1: "depressed"}
    class_name = label_to_class[predicted_label]
    sentences = split_into_sentences(text)
    sentence_embeddings = torch.tensor(model.encode(sentences), dtype=torch.float32)
    raw_scores = torch.matmul(sentence_embeddings, torch.ones(sentence_embeddings.size(-1), 1)).squeeze(-1)
    print(len(sentences))
    print(len(raw_scores))
    print(sentences)
    print(raw_scores)
    # Get sentiment scores (neg, neu, pos, compound)
    sentiment_scores = [sia.polarity_scores(sentence) for sentence in sentences]
    
    adjusted_scores = []
    
    for i, (raw_score, sentiment_score) in enumerate(zip(raw_scores, sentiment_scores)):
        if predicted_label == 1:  # Depressed classification
            # Use 'neg' score if it's significant
            sentiment_value = sentiment_score['neg']
            
            # If 'neg' is not significant, use 'compound' for negative sentiment
            if sentiment_value > 0:
                adjusted_scores.append(raw_score + sentiment_value)
            elif sentiment_score['compound'] < 0:  # Compound negative score
                adjusted_scores.append(raw_score + sentiment_score['compound'])
            else:
                adjusted_scores.append(0)  # Ignore if neither 'neg' nor 'compound' is negative
        elif predicted_label == 0:  # Not depressed classification
            # Use 'pos' score for not depressed classification
            sentiment_value = sentiment_score['pos']
            if sentiment_value > 0:  # Only adjust if 'pos' is significant
                adjusted_scores.append(raw_score + sentiment_value)
            else:
                adjusted_scores.append(0)  # Ignore sentences with low 'pos' score

    # Filter sentences with non-zero adjusted scores for the explanation
    filtered_sentences = [
        (sentence, score) for sentence, score in zip(sentences, adjusted_scores) if score > 0
    ]

    # Generate explanation text with only relevant sentences
    explanation = f"The input text was classified as **{class_name}**.\n"
    explanation += "Here are the sentences with their contribution scores:\n\n"

    for sentence, score in filtered_sentences:
        explanation += f"• \"{sentence}\" (Adjusted Contribution: {score:.2f})\n"

    # Fallback message if no sentences matched the criteria
    if not filtered_sentences:
        explanation += "No sentences matched the contribution criteria for this classification."

    return class_name, explanation

#text = "Thank you. I'm doing good. I'm from Los Angeles. Oh, great, I live in West Los Angeles, the west side. It's alright. No, I live alone, and I love it. I grew up here, so it's natural. The weather is always good; it's never bad. There's always something to do, rarely a dull moment. The traffic is horrible, but probably that's the case in almost any major city. I hate the traffic.I have enough going on here, so if I travel, it's usually within driving distance. I studied business. I've been done for a few years, so I haven't gone to school for a while. One of these days, I’ll go back to graduate school. My dream job would be working for myself and making lots of money. I don’t really have a specific dream job, just something where I can work on my terms, get paid decently, and be in a creative environment. It’s just a matter of finding the right situation.Right now, people are a little conservative about what they want, so it's tougher than it seems. But when the situation is right, I don’t think it’s too difficult. I work as an administrative assistant through a temp agency, doing desk jobs. I feel like I could do more, but for now, it works.I’m pretty close with my family. They’re around. What gets me really mad is stupid people who do things just to annoy me. They provoke me for no reason. They think it’s funny to push my buttons, like poking a stick at an angry dog. I try not to remember those situations; I just let it go and move on.To relax, I like to run, go to the gym, and listen to music. I have musician friends, so I’m usually around them. I enjoy anything related to art and creativity. Friends in general have been a positive influence. In a city like Los Angeles, where everyone is busy chasing their own goals, it’s hard to find someone uplifting. But I’ve had people point out things I could do differently, and I appreciate that.I think I can do better at handling stress and not letting people provoke me. Lately, I’ve been better at it, probably because my situation has improved. Looking back, I don’t have any major regrets. Maybe I could’ve traveled more or worked towards a better-paying job, but everyone has those thoughts at some point. I could’ve become a banker or lawyer and made more money, but I don’t like law or medicine, so it wasn’t meant to be.In life, I realized there are more things I dislike than like. I narrowed it down to what I’m good at, who I work well with, and what makes sense for me. It was a reality check, but it was necessary. Trying to conform to things that don’t suit you doesn’t make sense. It’s like trying to fit a round peg into a square hole.Memorable experiences? I don’t have any specific ones. Every day feels like a memorable experience, whether positive or negative. The fact that I have a job and can take care of myself is an accomplishment. To some, it might sound small, but for me, it’s something. I’ve gotten this far without self-destructing, and I see that as an achievement.When was the last time I felt really happy? I don’t have real highs or lows. I’m usually level. If I accomplish one thing each day, I feel happy. Yesterday, I felt happy because I accomplished something. I don’t have a best friend, but friends would probably describe me as outgoing, determined, and always networking. That’s what they would say about me."
text ="Yes, I’m doing fine. I’m originally from Indiana, but I moved to LA about ten years ago because I don’t like the cold weather—LA has great weather, the ocean, and lots to do. One of my favorite memories was spending a day on the Catalina Islands for my birthday. I’ve had a lot of changes in my life, like leaving Indiana for good, which felt huge at the time. I’ve been pursuing a career in filmmaking, something I haven’t done yet but really want to, although I can be shy and have trouble trusting people because of my past. When I’m stressed, I watch TV or go to the movies, and I don’t really argue or lose my temper. I try to avoid stress and push people away, which I regret because it makes it hard to form connections. I don’t have a close relationship with my family; we don’t talk much, and I often feel isolated. Sometimes I struggle to sleep, and racing thoughts or stress keep me up, but I try to manage it with music and relaxation. I’ve been feeling down lately, wishing my life had turned out differently, especially when it comes to having friends or a real relationship. I find comfort in playing online games with avatars where people don’t know who I am. My ideal weekend is playing games, going to the movies, or driving somewhere new to explore. If I could go back in time, I’d tell myself to keep a journal and make different choices. I regret how my life turned out and wish I had more stability, but I’m proud of being self-taught and consider myself a genius at figuring things out. Thanks for listening, and have a good day."
class_name, explanation = classify_and_explain(text)
print(explanation)

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\Prathima\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package punkt_tab to
[nltk_data]     C:\Users\Prathima\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


15
15
['Yes, I’m doing fine.', 'I’m originally from Indiana, but I moved to LA about ten years ago because I don’t like the cold weather—LA has great weather, the ocean, and lots to do.', 'One of my favorite memories was spending a day on the Catalina Islands for my birthday.', 'I’ve had a lot of changes in my life, like leaving Indiana for good, which felt huge at the time.', 'I’ve been pursuing a career in filmmaking, something I haven’t done yet but really want to, although I can be shy and have trouble trusting people because of my past.', 'When I’m stressed, I watch TV or go to the movies, and I don’t really argue or lose my temper.', 'I try to avoid stress and push people away, which I regret because it makes it hard to form connections.', 'I don’t have a close relationship with my family; we don’t talk much, and I often feel isolated.', 'Sometimes I struggle to sleep, and racing thoughts or stress keep me up, but I try to manage it with music and relaxation.', 'I’ve been feeling