In [1]:
# !pip install pandas scikit-learn ipywidgets
!wget -q -O mbti_1.csv https://huggingface.co/datasets/aayus017/mbti-personality-dataset/resolve/main/mbti_1.csv


In [2]:
import pandas as pd
import re

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import ipywidgets as widgets
from IPython.display import display, HTML

# Load the dataset (mbti_1.csv must be in the same folder)
df = pd.read_csv("mbti_1.csv")
#print(df.columns)
#df.head()


In [3]:
def clean_text(text):
    text = str(text).lower()
    # remove MBTI types so the model doesn't cheat
    text = re.sub(r"infj|intj|intp|entp|entj|estj|esfj|enfj|enfp|istp|isfp|istj|isfj|estp|esfp",
                  "", text)
    # remove URLs
    text = re.sub(r"http\S+|www\.\S+", "", text)
    # keep only letters and spaces
    text = re.sub(r"[^a-z\s]", " ", text)
    # collapse multiple spaces
    text = re.sub(r"\s+", " ", text).strip()
    return text

df["clean_posts"] = df["posts"].apply(clean_text)

df["EI"] = df["type"].str[0]
df["SN"] = df["type"].str[1]
df["TF"] = df["type"].str[2]
df["JP"] = df["type"].str[3]

#Ddf[["type", "EI", "SN", "TF", "JP", "clean_posts"]].head()


In [4]:
X_text = df["clean_posts"]

vectorizer_all = TfidfVectorizer(max_features=10000, stop_words="english")
X_all = vectorizer_all.fit_transform(X_text)
#print("TF-IDF matrix shape:", X_all.shape)

dimensions = ["EI", "SN", "TF", "JP"]
models = {}
scores = {}

for dim in dimensions:
    #print(f"\nTraining model for {dim}...")
    y = df[dim]

    X_train, X_test, y_train, y_test = train_test_split(
        X_all, y, test_size=0.2, random_state=42, stratify=y
    )

    clf = LogisticRegression(max_iter=2000, class_weight='balanced')
    clf.fit(X_train, y_train)
    acc = clf.score(X_test, y_test)
    #print(f"{dim} accuracy: {acc:.4f}")

    models[dim] = clf
    scores[dim] = acc

#print("\nFinal accuracies by dimension:")
#for dim in dimensions:
    #print(f"{dim}: {scores[dim]:.4f}")


In [5]:
def predict_mbti_from_text(text):
    cleaned = clean_text(text)
    vec = vectorizer_all.transform([cleaned])

    letters = []
    for dim in dimensions:
        letters.append(models[dim].predict(vec)[0])

    return "".join(letters)


In [6]:
# MBTI letter explanations
mbti_details = {
    "I": "Introverted – Gains energy from alone time and deep thought rather than social activity.",
    "E": "Extraverted – Gains energy from interacting, socializing, and being around people.",

    "N": "Intuitive – Focuses on big-picture ideas, theories, and possibilities.",
    "S": "Sensing – Prefers concrete details, facts, and real-world information.",

    "T": "Thinking – Makes decisions using logic and objective analysis.",
    "F": "Feeling – Makes decisions based on empathy, values, and emotional awareness.",

    "P": "Perceiving – Prefers spontaneity, flexibility, and open-ended situations.",
    "J": "Judging – Likes structure, plans, and clear decision-making."
}

# One-sentence summaries for 16 MBTI types
mbti_summary = {
    "INTP": "INTPs are analytical, curious thinkers who love exploring ideas and understanding how things work.",
    "INTJ": "INTJs are strategic, independent planners who think long-term and solve complex problems.",
    "INFP": "INFPs are idealistic, creative individuals guided by values, emotion, and imagination.",
    "INFJ": "INFJs are insightful, compassionate visionaries who value meaning and helping others.",
    "ISTP": "ISTPs are practical, hands-on problem solvers who stay calm and adaptable under pressure.",
    "ISTJ": "ISTJs are grounded, responsible organizers who value consistency and reliability.",
    "ISFP": "ISFPs are gentle, artistic creators who appreciate beauty, freedom, and personal expression.",
    "ISFJ": "ISFJs are caring, loyal supporters who prioritize harmony and protecting loved ones.",
    "ENTP": "ENTPs are energetic, inventive debaters who enjoy challenges and creative ideas.",
    "ENTJ": "ENTJs are bold, organized leaders who value efficiency, strategy, and clear direction.",
    "ENFP": "ENFPs are enthusiastic, imaginative explorers driven by passion and human connection.",
    "ENFJ": "ENFJs are warm, charismatic motivators who inspire others toward growth and cooperation.",
    "ESTP": "ESTPs are adventurous, action-oriented doers who love excitement and hands-on challenges.",
    "ESTJ": "ESTJs are structured, logical administrators who like order, leadership, and organization.",
    "ESFP": "ESFPs are spontaneous, fun-loving performers who enjoy living in the moment.",
    "ESFJ": "ESFJs are warm, supportive caregivers who value community, cooperation, and harmony."
}


In [7]:
# ===== TAB 1: MBTI From Text =====

social_text_input = widgets.Textarea(
    description="Text:",
    placeholder="Paste any text here (quote, post, article, message)...",
    layout=widgets.Layout(width="100%", height="150px")
)

social_button = widgets.Button(
    description="Predict MBTI",
    button_style="primary",
    layout=widgets.Layout(width="200px")
)

social_result = widgets.HTML()

def on_social_clicked(b):
    text = social_text_input.value.strip()

    if not text:
        social_result.value = "<b>Please enter some text.</b>"
        return

    if len(text.split()) < 15:
        social_result.value = "<b>Please enter at least 15 words for a more reliable prediction.</b>"
        return

    mbti = predict_mbti_from_text(text)
    L1, L2, L3, L4 = mbti[0], mbti[1], mbti[2], mbti[3]

    explanation = f"""
    <h3>Predicted MBTI Type: {mbti}</h3>

    <b>{L1}</b> – {mbti_details[L1]}<br>
    <b>{L2}</b> – {mbti_details[L2]}<br>
    <b>{L3}</b> – {mbti_details[L3]}<br>
    <b>{L4}</b> – {mbti_details[L4]}<br><br>

    <b>Overall Summary:</b><br>
    {mbti_summary.get(mbti, "No summary available for this type.")}
    """

    social_result.value = explanation

social_button.on_click(on_social_clicked)

social_tab = widgets.VBox([
    widgets.HTML("<h3>MBTI From Text</h3>"
                 "<p>Paste any text (quote, caption, post, message, or paragraph). "
                 "The model will predict the <b>writer's</b> MBTI personality type based on their writing style.</p>"),
    social_text_input,
    social_button,
    social_result
])


In [8]:
# ===== TAB 2: MBTI Quiz =====

quiz_questions = [
    ("When recharging, you prefer:", ("Being with people", "Being alone"), "EI"),
    ("You trust more:", ("Facts and concrete info", "Ideas and possibilities"), "SN"),
    ("You decide based on:", ("Logic and analysis", "Feelings and values"), "TF"),
    ("You prefer life to be:", ("Planned and structured", "Flexible and open"), "JP"),
]

quiz_widgets = []
for text_q, options, dim in quiz_questions:
    rb = widgets.RadioButtons(options=options, layout=widgets.Layout(width="100%"))
    quiz_widgets.append((text_q, rb, dim))

quiz_button = widgets.Button(
    description="Get My MBTI",
    button_style="success",
    layout=widgets.Layout(width="200px")
)

quiz_result = widgets.HTML()

def on_quiz_clicked(b):
    counts = {"E":0,"I":0,"S":0,"N":0,"T":0,"F":0,"J":0,"P":0}

    for text_q, rb, dim in quiz_widgets:
        ans = rb.value
        if dim == "EI":
            counts["E" if ans.startswith("Being with") else "I"] += 1
        elif dim == "SN":
            counts["S" if ans.startswith("Facts") else "N"] += 1
        elif dim == "TF":
            counts["T" if ans.startswith("Logic") else "F"] += 1
        elif dim == "JP":
            counts["J" if ans.startswith("Planned") else "P"] += 1

    mbti = (
        ("E" if counts["E"] >= counts["I"] else "I") +
        ("S" if counts["S"] >= counts["N"] else "N") +
        ("T" if counts["T"] >= counts["F"] else "F") +
        ("J" if counts["J"] >= counts["P"] else "P")
    )

    L1, L2, L3, L4 = mbti[0], mbti[1], mbti[2], mbti[3]

    explanation = f"""
    <h3>Your Quiz MBTI Result: {mbti}</h3>

    <b>{L1}</b> – {mbti_details[L1]}<br>
    <b>{L2}</b> – {mbti_details[L2]}<br>
    <b>{L3}</b> – {mbti_details[L3]}<br>
    <b>{L4}</b> – {mbti_details[L4]}<br><br>

    <b>Overall Summary:</b><br>
    {mbti_summary.get(mbti, "No summary available for this type.")}<br><br>

    <i>Note: This is a very short quiz and is meant for fun and demonstration only, not for official personality assessment.</i>
    """

    quiz_result.value = explanation

quiz_button.on_click(on_quiz_clicked)

quiz_items = []
for text_q, rb, dim in quiz_widgets:
    quiz_items.append(widgets.HTML(f"<b>{text_q}</b>"))
    quiz_items.append(rb)

quiz_tab = widgets.VBox([
    widgets.HTML("<h3>MBTI Quiz</h3>"
                 "<p>Answer a few simple questions to get an approximate MBTI type. "
                 "This is a lightweight demo, not a full psychological test.</p>"),
    *quiz_items,
    quiz_button,
    quiz_result
])


In [9]:
tabs = widgets.Tab(children=[social_tab, quiz_tab])
tabs.set_title(0, "MBTI From Text")
tabs.set_title(1, "MBTI Quiz")

display(HTML("<h2>MBTI Personality Analyzer</h2>"
             "<p>This app predicts MBTI personality types from writing and a short quiz. "
             "It is for educational and demonstration purposes only.</p>"))

display(tabs)


Tab(children=(VBox(children=(HTML(value="<h3>MBTI From Text</h3><p>Paste any text (quote, caption, post, messa…