In [None]:
!pip install nltk scikit-learn pandas numpy joblib ipywidgets

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets)
  Downloading jedi-0.19.2-py2.py3-none-any.whl.metadata (22 kB)
Downloading jedi-0.19.2-py2.py3-none-any.whl (1.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: jedi
Successfully installed jedi-0.19.2


In [None]:
import pandas as pd
import numpy as np
import re
import nltk
import joblib
import ipywidgets as widgets
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from IPython.display import display

# Download necessary NLTK resources
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
import pandas as pd
import io

# Upload file to Colab environment
from google.colab import files
uploaded = files.upload()

# Get the actual filename from the uploaded dictionary
# FinalData.csv= list(uploaded.keys())[0]  # Original line causing the error
FinalData_csv = list(uploaded.keys())[0]  # Get the first (and likely only) key - Fixed line

# Read the uploaded file using the correct filename and assign it to 'df'
# data_set = pd.read_csv(io.BytesIO(uploaded[FinalData.csv])) # Original line causing the error
df = pd.read_csv(io.BytesIO(uploaded[FinalData_csv])) # Fixed line: Assigned the dataframe to 'df'

Saving FinalData.csv to FinalData (4).csv


In [None]:
# Define text cleaning function
def clean_text(text):
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)  # Remove URLs
    text = re.sub(r'\W', ' ', text)  # Remove special characters
    text = re.sub(r'\s+[a-zA-Z]\s+', ' ', text)  # Remove single characters
    text = re.sub(r'\s+', ' ', text, flags=re.I)  # Remove multiple spaces
    return text.lower().strip()

# Apply text cleaning
df['cleaned_text'] = df['posts'].apply(clean_text)

# Map MBTI types to numerical labels
label_mapping = {ptype: idx for idx, ptype in enumerate(df['type'].unique())}
df['label'] = df['type'].map(label_mapping)

# Check data after preprocessing
df.head()

Unnamed: 0,type,posts,cleaned_text,label
0,INFJ,'http://www.youtube.com/watch?v=qsXHcwe3krw|||...,and intj moments sportscenter not top ten play...,0
1,ENTP,'I'm finding the lack of me in these posts ver...,m finding the lack of me in these posts very a...,1
2,INTP,'Good one _____ https://www.youtube.com/wat...,good one _____ course to which say know that m...,2
3,INTJ,"'Dear INTP, I enjoyed our conversation the o...",dear intp enjoyed our conversation the other d...,3
4,ENTJ,'You're fired.|||That's another silly misconce...,you re fired that another silly misconception ...,4


In [None]:
# Split dataset into train and test sets
X_train, X_test, y_train, y_test = train_test_split(df['cleaned_text'], df['label'], test_size=0.2, random_state=42)

# Create a text processing and classification pipeline
pipeline = Pipeline([
    ('tfidf', TfidfVectorizer(stop_words=stopwords.words('english'), max_features=5000)),
    ('classifier', LogisticRegression(max_iter=500))
])

# Train the model
pipeline.fit(X_train, y_train)

# Save trained model
joblib.dump(pipeline, 'personality_model.pkl')

# Print Model Accuracy
accuracy = pipeline.score(X_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 45.00%


In [None]:
# Load trained model
model = joblib.load('personality_model.pkl')

# Reverse label mapping for prediction output
reverse_label_mapping = {v: k for k, v in label_mapping.items()}

# Define personality prediction function
def predict_personality(text):
    cleaned_text = clean_text(text)
    label_pred = model.predict([cleaned_text])[0]
    return reverse_label_mapping[label_pred]

In [None]:
# Create widgets for input and prediction
text_input = widgets.Textarea(
    placeholder='Enter your text here...',
    description='Input:',
    layout=widgets.Layout(width='auto', height='100px')
)

predict_button = widgets.Button(description='Predict Personality')

output = widgets.Output()

# Define function to handle button click
def on_predict_button_clicked(b):
    with output:
        output.clear_output()
        user_input = text_input.value
        prediction = predict_personality(user_input)
        print(f'Predicted Personality Type: {prediction}')

predict_button.on_click(on_predict_button_clicked)

# Display UI
display(text_input, predict_button, output)

Textarea(value='', description='Input:', layout=Layout(height='100px', width='auto'), placeholder='Enter your …

Button(description='Predict Personality', style=ButtonStyle())

Output()

In [None]:
# Install gradio (if not already)
!pip install gradio


Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.4-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
# Step 2: Import necessary packages
import gradio as gr

# Step 3: Replace this with your real model logic
def predict_personality(text):
    # Dummy response (Replace with your model's prediction)
    return "Predicted Personality Type: INTJ"

# Step 4: Create interface
iface = gr.Interface(
    fn=predict_personality,
    inputs=gr.Textbox(lines=5, placeholder="Enter your text here..."),
    outputs="text",
    title="Personality Predictor",
    description="Enter a paragraph or sentence to predict your personality."
)

# Step 5: Launch app
iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e841f8a3e5a0fe0816.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [23]:
import gradio as gr
import random

# Sample dictionary with descriptions and GIF URLs for each personality type
personality_data = {
    "INTJ": {
        "description": "Strategic and analytical, known as the Mastermind.",
        "gif_url": "https://media.giphy.com/media/xUOwGpdeKfcz6tNQK0/giphy.gif"
    },
    "INFP": {
        "description": "Idealistic and empathetic, known as the Mediator.",
        "gif_url": "https://media.giphy.com/media/26tn33aiTi1jkl6H6/giphy.gif"
    },
    "ENTP": {
        "description": "Inventive and enthusiastic, known as the Debater.",
        "gif_url": "https://media.giphy.com/media/YTbZzCkRQCEJa/giphy.gif"
    },
    "ESFP": {
        "description": "Playful and energetic, known as the Entertainer.",
        "gif_url": "https://media.giphy.com/media/d31w24psGYeekCZy/giphy.gif"
    },
    # Add more personality types as needed
}

# Dummy prediction function (replace with your ML model)
def predict_personality(text):
    predicted_type = random.choice(list(personality_data.keys()))
    data = personality_data[predicted_type]
    result_text = f"Predicted Personality Type: {predicted_type}\n\n{data['description']}"
    return result_text, data['gif_url']

# Gradio Interface
with gr.Blocks() as iface:
    gr.Markdown("## 🌟 Personality Predictor")
    gr.Markdown("Enter a paragraph or sentence to discover your MBTI personality type with an animated result!")

    with gr.Row():
        with gr.Column():
            user_input = gr.Textbox(label="Your text", lines=5, placeholder="Type your personality-revealing paragraph here...")
            submit_btn = gr.Button("🔮 Predict")
        with gr.Column():
            output_text = gr.Textbox(label="Personality Output")
            gif_display = gr.Image(label="Personality Animation", type="filepath")

    submit_btn.click(fn=predict_personality, inputs=user_input, outputs=[output_text, gif_display])

# Launch with public link
iface.launch(share=True)


Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://e9f832fb4c7070593b.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Sample training data
data = {
    'text': [
        "I enjoy being with people and going on adventures.",
        "I love solving logical problems and organizing my schedule.",
        "I prefer quiet places and deep conversations.",
        "I make decisions based on feelings, not logic."
    ],
    'label': ['ENFP', 'INTJ', 'INFJ', 'ISFP']
}

df = pd.DataFrame(data)

# Create and train model
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(df['text'], df['label'])


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import make_pipeline

# Sample training data
data = {
    'text': [
        "I enjoy being with people and going on adventures.",
        "I love solving logical problems and organizing my schedule.",
        "I prefer quiet places and deep conversations.",
        "I make decisions based on feelings, not logic."
    ],
    'label': ['ENFP', 'INTJ', 'INFJ', 'ISFP']
}

df = pd.DataFrame(data)

# Create and train model
model = make_pipeline(TfidfVectorizer(), MultinomialNB())
model.fit(df['text'], df['label'])


In [None]:
from google.colab import files
uploaded = files.upload()


Saving personality_images.zip to personality_images (4).zip


In [None]:
import zipfile
import os

# Extract the images
zip_path = "personality_images.zip"
output_folder = "personality_images"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(output_folder)

# Optional: check if images extracted correctly
for img in os.listdir(output_folder):
    print(img)


ESFP.png
INTJ.png
ISFP.png
ENTJ.png
ENFJ.png
ISTP.png
ENTP.png
ENFP.png
INFJ.png
INTP.png
ESTP.png
ESTJ.png
ISFJ.png
ESFJ.png
INFP.png
ISTJ.png


In [None]:
def predict_with_image(text):
    personality = model.predict([text])[0].upper()
    image_path = f"{output_folder}/{personality}.png"

    if not os.path.exists(image_path):
        image_path = None  # fallback

    return personality, image_path


In [None]:
import gradio as gr

iface = gr.Interface(
    fn=predict_with_image,
    inputs=gr.Textbox(lines=5, placeholder="Enter text...", label="Your Thoughts"),
    outputs=[
        gr.Textbox(label="Predicted Personality"),
        gr.Image(label="Personality Image")
    ],
    title="Personality Type Predictor",
    description="Enter some text to get your MBTI type and a fun cartoon image!"
)

iface.launch()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a70d2e3569d73e24c0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


