In [2]:
import torch # for tensor computations and model inference
from transformers import AutoTokenizer, AutoModel # for lodind pre trained models and tokenizers
import numpy as np # for neumerical operations
from sklearn.metrics.pairwise import cosine_similarity # to calculate similarity between text embeddings

In [3]:
# loding a pretrained personality model
prsn = "bert-base-uncased"

In [4]:
# loding the tokenizers and model from hugging face hub
# tokenizer converts text into model redable tokens
# model performs the personality classification
tokenizer = AutoTokenizer.from_pretrained(prsn)
model = AutoModel.from_pretrained(prsn)

In [5]:
# defining big 5 trait anchor using descriptive keywords
trait_keywords = {
    "Openness": "curiosity creativity imagination open-mindedness",
    "Conscientiousness": "organized responsible disciplined dependable",
    "Extraversion": "social outgoing energetic enthusiastic",
    "Agreeableness": "kind trusting helpful empathetic",
    "Neuroticism": "anxious moody stressed insecure",
    # Additional nuanced traits
    "Assertiveness": "confident outspoken decisive bold",
    "Altruism": "selfless giving generous caring",
    "Impulsivity": "spontaneous unplanned reactive",
    "Cautiousness": "careful vigilant risk-averse",
    "Self-consciousness": "shy embarrassed reserved nervous",
    "Adventurousness": "risk-taking bold explorer traveler",
    "Emotional Awareness": "self-aware intuitive sensitive mindful",
    "Industriousness": "hardworking persistent productive goal-oriented"
}

In [6]:
# Generate an embedding vector for a given text using BERT
# Disable gradient calculation to save memory during inference
def get_embedding(text):
    with torch.no_grad():
        # tokenize input text
        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
        # pass tokens through model to get hidden states
        outputs = model(**inputs)
        # Average the token embeddings from the last hidden layer
        last_hidden = outputs[0]
        return last_hidden.mean(dim=1).squeeze().numpy()

In [7]:
# precompute embeddings for each trait keyword description
# this will act as anchor vectors for personality trait comparison
trait_embeddings = {trait: get_embedding(desc) for trait, desc in trait_keywords.items()}

In [8]:
# analyze personality by comparing user text to trait anchor embeddings
def analyze_personality(user_input):
    with torch.no_grad():
        # generate embedding for input text
        text_emb = get_embedding(user_input)
        traits ={}
        # compare text embedding with each trait embedding using cosine similarity
        for trait, emb in trait_embeddings.items():
            sim = cosine_similarity([text_emb], [emb])[0][0] # calculate similarity
            traits[trait] = round(float(sim),2) # round similarity score to 2 decimals
        return traits # return dictionary of trait scores

In [9]:
# creating an in memory storage to keep personality analysis sessions
memory_store = [] # will store input text, traits, and optional feedback

In [10]:
# store users input and the resulting trait analysis in memory
def store_result(text, traits):
    memory_store.append({
        "text": text, # input text
        "traits": traits, # predicted traits
        "feedback": None # placeholder for user feedback
    })   

In [15]:
# function to update a stored analysis result with user feedback
def update_feedback(index, updated_traits):
    if 0<= index < len(memory_store): # ensure valid index
        memory_store[index]["feedback"] = updated_traits # save feedback
        print(f"feedback stored for entry {index}")
    else:
        print("invalid index")

In [16]:
# if __name__ == "__main__":
#     print("\ntype 'exit' to stop\n")
#     while True:
#         user_input = input("you: ")
#         if user_input.lower() == "exit":
#             print("exiting...goodbye!")
#             break
#         traits = analyze_personality(user_input)
#         store_result(user_input, traits)
#         print("predicted personality traits:")
#         for trait, score in traits.items():
#             print(f" {trait}: {score}")
#         print()