# 🎯 Streamlit XGBoost Personality Prediction App

This notebook creates an interactive Streamlit web application that uses our trained XGBoost model to predict personality types (Introvert/Extrovert) based on user input features.

## Features:
- Interactive sliders for all input variables
- Real-time personality prediction
- Confidence score display
- Feature descriptions and explanations
- Model performance metrics
- Beautiful UI with custom styling

## 1. Install Required Packages

In [1]:
# Install required packages if not already installed
import subprocess
import sys

def install_package(package):
    try:
        __import__(package)
        print(f"✅ {package} is already installed")
    except ImportError:
        print(f"📦 Installing {package}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", package])
        print(f"✅ {package} installed successfully")

# List of required packages
required_packages = [
    'streamlit',
    'plotly',
    'seaborn'
]

print("Checking and installing required packages...")
for package in required_packages:
    install_package(package)

print("\n🎉 All packages are ready!")

Checking and installing required packages...
📦 Installing streamlit...
✅ streamlit installed successfully
✅ plotly is already installed
✅ seaborn is already installed

🎉 All packages are ready!


## 2. Import Libraries and Load Model

In [2]:
import streamlit as st
import pandas as pd
import numpy as np
import xgboost as xgb
import pickle
import plotly.graph_objects as go
import plotly.express as px
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

print("✅ All libraries imported successfully!")
print(f"Streamlit version: {st.__version__}")
print(f"XGBoost version: {xgb.__version__}")
print(f"Pandas version: {pd.__version__}")

✅ All libraries imported successfully!
Streamlit version: 1.46.1
XGBoost version: 1.7.5
Pandas version: 2.2.3


## 3. Load and Prepare Model Data

In [3]:
# Load the training data to get feature information and ranges
print("📂 Loading training data and model components...")

try:
    # Load training data
    train_df = pd.read_csv('train.csv')
    print(f"✅ Training data loaded: {train_df.shape}")
    
    # Load test data to understand feature ranges
    test_df = pd.read_csv('test.csv')
    print(f"✅ Test data loaded: {test_df.shape}")
    
except FileNotFoundError as e:
    print(f"❌ Error loading data files: {e}")
    print("Please ensure train.csv and test.csv are in the current directory")

# Get feature columns (excluding id and target)
feature_columns = [col for col in train_df.columns if col not in ['id', 'Personality']]
categorical_columns = train_df[feature_columns].select_dtypes(include=['object']).columns.tolist()
numerical_columns = train_df[feature_columns].select_dtypes(include=[np.number]).columns.tolist()

print(f"\n📊 Dataset Information:")
print(f"Total features: {len(feature_columns)}")
print(f"Numerical features: {len(numerical_columns)}")
print(f"Categorical features: {len(categorical_columns)}")
print(f"Feature columns: {feature_columns[:5]}... (showing first 5)")

# Get feature statistics for slider ranges
feature_stats = {}
for col in numerical_columns:
    combined_data = pd.concat([train_df[col], test_df[col]])
    feature_stats[col] = {
        'min': combined_data.min(),
        'max': combined_data.max(),
        'mean': combined_data.mean(),
        'std': combined_data.std(),
        'median': combined_data.median()
    }

print(f"✅ Feature statistics calculated for {len(feature_stats)} numerical features")

📂 Loading training data and model components...
✅ Training data loaded: (18524, 9)
✅ Test data loaded: (6175, 8)

📊 Dataset Information:
Total features: 7
Numerical features: 5
Categorical features: 2
Feature columns: ['Time_spent_Alone', 'Stage_fear', 'Social_event_attendance', 'Going_outside', 'Drained_after_socializing']... (showing first 5)
✅ Feature statistics calculated for 5 numerical features


## 4. Train a Simple Model for the App

Since we need to ensure the model is available, let's train a lightweight version for the app:

In [4]:
# Prepare the data for training a simple model for the app
print("🚀 Training XGBoost model for the Streamlit app...")

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report

# Prepare features
X = train_df[feature_columns].copy()
y = train_df['Personality'].copy()

# Handle categorical variables
label_encoders = {}
for col in categorical_columns:
    if col in X.columns:
        le = LabelEncoder()
        # Combine train and test data for consistent encoding
        combined_values = pd.concat([X[col], test_df[col]]).astype(str)
        le.fit(combined_values)
        X[col] = le.transform(X[col].astype(str))
        label_encoders[col] = le

# Encode target variable
target_encoder = LabelEncoder()
y_encoded = target_encoder.fit_transform(y)

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Train XGBoost model
xgb_model = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    random_state=42,
    eval_metric='logloss'
)

print("Training model...")
xgb_model.fit(X_train, y_train)

# Evaluate model
y_pred = xgb_model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"✅ Model training completed!")
print(f"Model accuracy: {accuracy:.4f}")
print(f"Target classes: {target_encoder.classes_}")

# Save model components for the app
model_components = {
    'model': xgb_model,
    'target_encoder': target_encoder,
    'label_encoders': label_encoders,
    'feature_columns': feature_columns,
    'categorical_columns': categorical_columns,
    'numerical_columns': numerical_columns,
    'feature_stats': feature_stats,
    'accuracy': accuracy
}

# Save to pickle file
with open('xgboost_personality_model.pkl', 'wb') as f:
    pickle.dump(model_components, f)

print(f"✅ Model components saved to 'xgboost_personality_model.pkl'")

🚀 Training XGBoost model for the Streamlit app...
Training model...
✅ Model training completed!
Model accuracy: 0.9714
Target classes: ['Extrovert' 'Introvert']
✅ Model components saved to 'xgboost_personality_model.pkl'


## 5. Create Feature Descriptions

Let's create meaningful descriptions for each feature to help users understand what they're inputting:

In [5]:
# Create feature descriptions for the app
feature_descriptions = {
    # Updated descriptions based on the actual dataset variables
    'Time_spent_Alone': 'Average time (hours or scale) a person spends alone everyday. Higher values often indicate introversion.',
    'Stage_fear': 'Do you feel nervous or scared speaking or performing in front of a group? (Yes/No). More common among introverts.',
    'Social_event_attendance': 'Frequency of attending social events (0–10 scale). 0 = never attend avoidable social events, 10 = always attend when possible.',
    'Going_outside': 'How many days in a week do you go outside? (0–7 scale). Not for work, but for rides, shopping, events, socializing etc.',
    'Drained_after_socializing': 'Do you start overthinking about your conversations after socializing? (Yes/No). Indicates feeling drained after social interactions.',
    'Friends_circle_size': 'Number of close friends you can call almost anytime and talk without having major news to share. Larger for extroverts.',
    'Post_frequency': 'How often do you post on social media? (0–10 scale). 0 = never post, 10 = always share your P.O.V., news, achievements, experiences etc.',
    
    # Keep some generic descriptions for common personality features
    'Age': 'Your current age in years',
    'Gender': 'Your gender identity',
    'Education_Level': 'Your highest level of education completed',
    'Introversion_Score': 'Score measuring introverted tendencies (higher = more introverted)',
    'Sensing_Score': 'Score measuring preference for concrete information vs abstract concepts',
    'Thinking_Score': 'Score measuring preference for logical vs emotional decision making',
    'Judging_Score': 'Score measuring preference for structure vs flexibility',
    'Depression_Score': 'Score measuring depressive symptoms or tendencies',
    'Anxiety_Score': 'Score measuring anxiety levels or tendencies',
    'Stress_Score': 'Score measuring stress levels in daily life',
    'Mental_Health_History': 'History of mental health issues or treatment',
    'Personality_Disorder': 'Presence of diagnosed personality disorders',
}

# Create default descriptions for any features not explicitly defined
for col in feature_columns:
    if col not in feature_descriptions:
        # Create a generic description based on column name
        clean_name = col.replace('_', ' ').title()
        if 'score' in col.lower():
            feature_descriptions[col] = f'{clean_name} - A numerical measurement or rating'
        elif 'time' in col.lower():
            feature_descriptions[col] = f'{clean_name} - Time-related measurement'
        elif 'count' in col.lower() or 'number' in col.lower():
            feature_descriptions[col] = f'{clean_name} - Count or frequency measurement'
        elif 'frequency' in col.lower():
            feature_descriptions[col] = f'{clean_name} - Frequency or rate measurement'
        elif 'size' in col.lower():
            feature_descriptions[col] = f'{clean_name} - Size or quantity measurement'
        elif 'fear' in col.lower():
            feature_descriptions[col] = f'{clean_name} - Fear or anxiety related characteristic'
        elif 'social' in col.lower():
            feature_descriptions[col] = f'{clean_name} - Social behavior or preference'
        else:
            feature_descriptions[col] = f'{clean_name} - Personal characteristic or preference'

print(f"✅ Created descriptions for {len(feature_descriptions)} features")
print("\nSample descriptions:")
for i, (feature, desc) in enumerate(list(feature_descriptions.items())[:5]):
    print(f"  {feature}: {desc}")
print("  ...")

✅ Created descriptions for 19 features

Sample descriptions:
  Age: Your current age in years
  Gender: Your gender identity
  Education_Level: Your highest level of education completed
  Introversion_Score: Score measuring introverted tendencies (higher = more introverted)
  Sensing_Score: Score measuring preference for concrete information vs abstract concepts
  ...


## 6. Create the Streamlit Application

In [6]:
%%writefile streamlit_personality_app.py
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import warnings
warnings.filterwarnings('ignore')

# Page configuration
st.set_page_config(
    page_title="🧠 Personality Predictor",
    page_icon="🧠",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        font-size: 3rem;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 2rem;
    }
    .prediction-box {
        background-color: #f0f2f6;
        border-radius: 10px;
        padding: 20px;
        margin: 10px 0;
        border-left: 5px solid #1f77b4;
    }
    .feature-description {
        font-size: 0.8rem;
        color: #666;
        font-style: italic;
        margin-top: -10px;
        margin-bottom: 10px;
    }
    .metric-container {
        background-color: #ffffff;
        border-radius: 8px;
        padding: 15px;
        box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        margin: 10px 0;
    }
</style>
""", unsafe_allow_html=True)

@st.cache_data
def load_model_components():
    """Load the trained model and its components"""
    try:
        with open('xgboost_personality_model.pkl', 'rb') as f:
            components = pickle.load(f)
        return components
    except FileNotFoundError:
        st.error("❌ Model file not found! Please run the training notebook first.")
        st.stop()

def create_feature_descriptions():
    """Create feature descriptions for better user understanding"""
    return {
        # Updated descriptions based on the actual dataset variables
        'Time_spent_Alone': 'Average time (hours or scale) a person spends alone everyday. Higher values often indicate introversion.',
        'Stage_fear': 'Do you feel nervous or scared speaking or performing in front of a group? (Yes/No). More common among introverts.',
        'Social_event_attendance': 'Frequency of attending social events (0–10 scale). 0 = never attend avoidable social events, 10 = always attend when possible.',
        'Going_outside': 'How many days in a week do you go outside? (0–7 scale). Not for work, but for rides, shopping, events, socializing etc.',
        'Drained_after_socializing': 'Do you start overthinking about your conversations after socializing? (Yes/No). Indicates feeling drained after social interactions.',
        'Friends_circle_size': 'Number of close friends you can call almost anytime and talk without having major news to share. Larger for extroverts.',
        'Post_frequency': 'How often do you post on social media? (0–10 scale). 0 = never post, 10 = always share your P.O.V., news, achievements, experiences etc.',
        
        # Keep some generic descriptions for common personality features
        'Age': 'Your current age in years',
        'Gender': 'Your gender identity',
        'Education_Level': 'Your highest level of education completed',
        'Introversion_Score': 'Score measuring introverted tendencies (higher = more introverted)',
        'Sensing_Score': 'Score measuring preference for concrete information vs abstract concepts',
        'Thinking_Score': 'Score measuring preference for logical vs emotional decision making',
        'Judging_Score': 'Score measuring preference for structure vs flexibility',
        'Depression_Score': 'Score measuring depressive symptoms or tendencies',
        'Anxiety_Score': 'Score measuring anxiety levels or tendencies',
        'Stress_Score': 'Score measuring stress levels in daily life',
        'Mental_Health_History': 'History of mental health issues or treatment',
        'Personality_Disorder': 'Presence of diagnosed personality disorders',
    }

def get_feature_description(feature_name, descriptions):
    """Get description for a feature, with fallback for unknown features"""
    if feature_name in descriptions:
        return descriptions[feature_name]
    else:
        # Create a generic description
        clean_name = feature_name.replace('_', ' ').title()
        if 'score' in feature_name.lower():
            return f'{clean_name} - A numerical measurement or rating'
        elif 'time' in feature_name.lower():
            return f'{clean_name} - Time-related measurement'
        elif 'count' in feature_name.lower() or 'number' in feature_name.lower():
            return f'{clean_name} - Count or frequency measurement'
        elif 'frequency' in feature_name.lower():
            return f'{clean_name} - Frequency or rate measurement'
        elif 'size' in feature_name.lower():
            return f'{clean_name} - Size or quantity measurement'
        elif 'fear' in feature_name.lower():
            return f'{clean_name} - Fear or anxiety related characteristic'
        elif 'social' in feature_name.lower():
            return f'{clean_name} - Social behavior or preference'
        else:
            return f'{clean_name} - Personal characteristic or preference'

def create_gauge_chart(value, title, max_val=1.0):
    """Create a gauge chart for confidence score"""
    fig = go.Figure(go.Indicator(
        mode = "gauge+number+delta",
        value = value,
        domain = {'x': [0, 1], 'y': [0, 1]},
        title = {'text': title},
        gauge = {
            'axis': {'range': [None, max_val]},
            'bar': {'color': "darkblue"},
            'steps': [
                {'range': [0, 0.5], 'color': "lightgray"},
                {'range': [0.5, 0.8], 'color': "yellow"},
                {'range': [0.8, max_val], 'color': "green"}
            ],
            'threshold': {
                'line': {'color': "red", 'width': 4},
                'thickness': 0.75,
                'value': 0.9
            }
        }
    ))
    
    fig.update_layout(height=300, margin={'l': 20, 'r': 20, 't': 40, 'b': 20})
    return fig

def create_probability_chart(proba_scores, class_names):
    """Create a horizontal bar chart for class probabilities"""
    fig = go.Figure()
    
    colors = ['#ff7f0e', '#1f77b4']  # Orange for Introvert, Blue for Extrovert
    
    for i, (prob, name) in enumerate(zip(proba_scores, class_names)):
        fig.add_trace(go.Bar(
            y=[name],
            x=[prob],
            orientation='h',
            marker_color=colors[i],
            text=f'{prob:.1%}',
            textposition='inside'
        ))
    
    fig.update_layout(
        title="Prediction Probabilities",
        xaxis_title="Probability",
        yaxis_title="Personality Type",
        height=200,
        showlegend=False,
        margin={'l': 20, 'r': 20, 't': 40, 'b': 20}
    )
    
    fig.update_xaxis(range=[0, 1], tickformat='.0%')
    
    return fig

def main():
    # App title and description
    st.markdown('<h1 class="main-header">🧠 AI Personality Predictor</h1>', unsafe_allow_html=True)
    
    st.markdown("""
    <div style="text-align: center; margin-bottom: 2rem;">
        <p style="font-size: 1.2rem; color: #666;">
            Discover your personality type using advanced Machine Learning! 
            Adjust the sliders below to input your characteristics and get an instant prediction.
        </p>
    </div>
    """, unsafe_allow_html=True)
    
    # Load model components
    components = load_model_components()
    model = components['model']
    target_encoder = components['target_encoder']
    label_encoders = components['label_encoders']
    feature_columns = components['feature_columns']
    categorical_columns = components['categorical_columns']
    numerical_columns = components['numerical_columns']
    feature_stats = components['feature_stats']
    model_accuracy = components['accuracy']
    
    # Feature descriptions
    descriptions = create_feature_descriptions()
    
    # Create two columns for layout
    col1, col2 = st.columns([2, 1])
    
    with col1:
        st.header("📊 Input Your Characteristics")
        
        # Create tabs for different types of features
        if len(categorical_columns) > 0 and len(numerical_columns) > 0:
            tab1, tab2 = st.tabs(["📈 Numerical Features", "📋 Categorical Features"])
            
            with tab1:
                st.subheader("Numerical Characteristics")
                numerical_inputs = {}
                
                # Create sliders for numerical features
                for feature in numerical_columns:
                    if feature in feature_stats:
                        stats = feature_stats[feature]
                        
                        # Description
                        description = get_feature_description(feature, descriptions)
                        st.markdown(f"**{feature.replace('_', ' ').title()}**")
                        st.markdown(f'<p class="feature-description">{description}</p>', unsafe_allow_html=True)
                        
                        # Slider
                        numerical_inputs[feature] = st.slider(
                            f"Select {feature.replace('_', ' ').lower()}",
                            min_value=float(stats['min']),
                            max_value=float(stats['max']),
                            value=float(stats['median']),
                            step=(stats['max'] - stats['min']) / 100,
                            key=feature
                        )
                        
                        # Show some statistics
                        st.caption(f"Range: {stats['min']:.1f} - {stats['max']:.1f} | Average: {stats['mean']:.1f}")
                        st.divider()
            
            with tab2:
                st.subheader("Categorical Characteristics")
                categorical_inputs = {}
                
                # Create selectboxes for categorical features
                for feature in categorical_columns:
                    if feature in label_encoders:
                        # Description
                        description = get_feature_description(feature, descriptions)
                        st.markdown(f"**{feature.replace('_', ' ').title()}**")
                        st.markdown(f'<p class="feature-description">{description}</p>', unsafe_allow_html=True)
                        
                        # Selectbox
                        options = label_encoders[feature].classes_
                        categorical_inputs[feature] = st.selectbox(
                            f"Select {feature.replace('_', ' ').lower()}",
                            options=options,
                            key=feature
                        )
                        st.divider()
        else:
            # If only one type of feature, show them all in one section
            user_inputs = {}
            
            for feature in feature_columns:
                description = get_feature_description(feature, descriptions)
                st.markdown(f"**{feature.replace('_', ' ').title()}**")
                st.markdown(f'<p class="feature-description">{description}</p>', unsafe_allow_html=True)
                
                if feature in numerical_columns and feature in feature_stats:
                    stats = feature_stats[feature]
                    user_inputs[feature] = st.slider(
                        f"Select {feature.replace('_', ' ').lower()}",
                        min_value=float(stats['min']),
                        max_value=float(stats['max']),
                        value=float(stats['median']),
                        step=(stats['max'] - stats['min']) / 100,
                        key=feature
                    )
                    st.caption(f"Range: {stats['min']:.1f} - {stats['max']:.1f} | Average: {stats['mean']:.1f}")
                    
                elif feature in categorical_columns and feature in label_encoders:
                    options = label_encoders[feature].classes_
                    user_inputs[feature] = st.selectbox(
                        f"Select {feature.replace('_', ' ').lower()}",
                        options=options,
                        key=feature
                    )
                
                st.divider()
    
    with col2:
        st.header("🎯 Prediction Results")
        
        # Collect all inputs
        if 'numerical_inputs' in locals() and 'categorical_inputs' in locals():
            all_inputs = {**numerical_inputs, **categorical_inputs}
        elif 'user_inputs' in locals():
            all_inputs = user_inputs
        else:
            all_inputs = {}
        
        # Create prediction button
        if st.button("🔮 Predict My Personality", type="primary", use_container_width=True):
            
            # Prepare input data
            input_data = pd.DataFrame([all_inputs])
            
            # Encode categorical variables
            for feature in categorical_columns:
                if feature in input_data.columns and feature in label_encoders:
                    le = label_encoders[feature]
                    input_data[feature] = le.transform([all_inputs[feature]])
            
            # Ensure all features are present and in correct order
            input_data = input_data.reindex(columns=feature_columns, fill_value=0)
            
            # Make prediction
            prediction = model.predict(input_data)[0]
            prediction_proba = model.predict_proba(input_data)[0]
            
            # Convert prediction back to label
            predicted_personality = target_encoder.inverse_transform([prediction])[0]
            confidence = max(prediction_proba)
            
            # Display results
            st.markdown('<div class="prediction-box">', unsafe_allow_html=True)
            
            # Main prediction
            if predicted_personality == 'Extrovert':
                st.success(f"🎉 **You are predicted to be an {predicted_personality}!**")
                personality_emoji = "🎉"
                personality_color = "#1f77b4"
            else:
                st.info(f"🤔 **You are predicted to be an {predicted_personality}!**")
                personality_emoji = "🤔"
                personality_color = "#ff7f0e"
            
            st.markdown('</div>', unsafe_allow_html=True)
            
            # Confidence gauge
            st.subheader("📊 Confidence Score")
            gauge_fig = create_gauge_chart(confidence, "Model Confidence")
            st.plotly_chart(gauge_fig, use_container_width=True)
            
            # Probability breakdown
            st.subheader("📈 Probability Breakdown")
            prob_fig = create_probability_chart(prediction_proba, target_encoder.classes_)
            st.plotly_chart(prob_fig, use_container_width=True)
            
            # Additional insights
            st.subheader("💡 Insights")
            
            if confidence > 0.8:
                st.success(f"🎯 **High Confidence Prediction** (>{confidence:.1%})")
                st.write("The model is very confident in this prediction.")
            elif confidence > 0.6:
                st.warning(f"⚖️ **Moderate Confidence** ({confidence:.1%})")
                st.write("The model has moderate confidence. You might have characteristics of both personality types.")
            else:
                st.error(f"🤷 **Low Confidence** ({confidence:.1%})")
                st.write("The model has low confidence. Your characteristics show a mix of both personality types.")
            
            # Personality type explanation
            st.subheader(f"📖 About {predicted_personality}s")
            
            if predicted_personality == 'Extrovert':
                st.markdown("""
                **Extroverts** typically:
                - Gain energy from social interactions
                - Enjoy being around people
                - Are often outgoing and talkative
                - Prefer group activities
                - Think out loud and process externally
                """)
            else:
                st.markdown("""
                **Introverts** typically:
                - Gain energy from solitude
                - Prefer smaller social groups
                - Are often thoughtful and reflective
                - Enjoy quiet activities
                - Think internally before speaking
                """)
        
        else:
            st.info("👆 Adjust the sliders and click 'Predict My Personality' to see your results!")
            
            # Show model information
            st.subheader("🤖 Model Information")
            st.markdown(f"""
            <div class="metric-container">
                <h4>📈 Model Performance</h4>
                <p><strong>Accuracy:</strong> {model_accuracy:.1%}</p>
                <p><strong>Algorithm:</strong> XGBoost Classifier</p>
                <p><strong>Features:</strong> {len(feature_columns)} characteristics</p>
                <p><strong>Classes:</strong> {', '.join(target_encoder.classes_)}</p>
            </div>
            """, unsafe_allow_html=True)
    
    # Footer
    st.markdown("---")
    st.markdown("""
    <div style="text-align: center; color: #666; margin-top: 2rem;">
        <p>🧠 <strong>AI Personality Predictor</strong> | Built with Streamlit & XGBoost</p>
        <p><em>This is for educational purposes. Results should not be considered as professional psychological assessment.</em></p>
    </div>
    """, unsafe_allow_html=True)

if __name__ == "__main__":
    main()

Overwriting streamlit_personality_app.py


## 7. Launch Instructions and App Information

In [7]:
# Instructions for running the Streamlit app
print("🚀 STREAMLIT APP CREATED SUCCESSFULLY!")
print("=" * 60)

print("\n📁 Files created:")
print("  ✅ streamlit_personality_app.py - Main Streamlit application")
print("  ✅ xgboost_personality_model.pkl - Trained model and components")

print("\n🚀 To run the Streamlit app:")
print("  1. Open a terminal/command prompt")
print("  2. Navigate to this directory")
print("  3. Run: streamlit run streamlit_personality_app.py")
print("  4. The app will open in your browser automatically")

print("\n🎯 App Features:")
print("  📊 Interactive sliders for all numerical features")
print("  📋 Dropdown menus for categorical features")
print("  🔮 Real-time personality prediction")
print("  📈 Confidence score with gauge visualization")
print("  📊 Probability breakdown chart")
print("  💡 Personalized insights based on confidence")
print("  📖 Educational information about personality types")
print("  🎨 Beautiful, responsive UI design")

print("\n🛠️ Technical Details:")
print(f"  🤖 Model: XGBoost Classifier (Accuracy: {model_components['accuracy']:.1%})")
print(f"  📊 Features: {len(model_components['feature_columns'])} total")
print(f"  🔢 Numerical: {len(model_components['numerical_columns'])}")
print(f"  📋 Categorical: {len(model_components['categorical_columns'])}")
print(f"  🎯 Classes: {', '.join(model_components['target_encoder'].classes_)}")

print("\n⚠️  Requirements:")
print("  - streamlit")
print("  - pandas")
print("  - numpy")
print("  - xgboost")
print("  - plotly")
print("  - scikit-learn")

print("\n🎉 Ready to launch! Run the command above to start the app.")
print("=" * 60)

🚀 STREAMLIT APP CREATED SUCCESSFULLY!

📁 Files created:
  ✅ streamlit_personality_app.py - Main Streamlit application
  ✅ xgboost_personality_model.pkl - Trained model and components

🚀 To run the Streamlit app:
  1. Open a terminal/command prompt
  2. Navigate to this directory
  3. Run: streamlit run streamlit_personality_app.py
  4. The app will open in your browser automatically

🎯 App Features:
  📊 Interactive sliders for all numerical features
  📋 Dropdown menus for categorical features
  🔮 Real-time personality prediction
  📈 Confidence score with gauge visualization
  📊 Probability breakdown chart
  💡 Personalized insights based on confidence
  📖 Educational information about personality types
  🎨 Beautiful, responsive UI design

🛠️ Technical Details:
  🤖 Model: XGBoost Classifier (Accuracy: 97.1%)
  📊 Features: 7 total
  🔢 Numerical: 5
  📋 Categorical: 2
  🎯 Classes: Extrovert, Introvert

⚠️  Requirements:
  - streamlit
  - pandas
  - numpy
  - xgboost
  - plotly
  - scikit-le

## 8. Quick Launch Command

Run this cell to automatically launch the Streamlit app:

## 🚀 How to Run the Streamlit App from Terminal

**Instead of running the cell below, follow these steps to launch the app from your terminal:**

### Windows PowerShell Instructions:

1. **Open PowerShell** (Press `Win + X` and select "Windows PowerShell" or "Terminal")

2. **Navigate to your project directory:**
   ```powershell
   cd "c:\Users\komil\PycharmProjects\pythonProject\Pytorch\data\Kaggle_Competitions\playground-series-s5e7"
   ```

3. **Run the Streamlit app:**
   ```powershell
   streamlit run streamlit_personality_app.py
   ```

4. **The app will automatically open in your browser** at `http://localhost:8501`

### Troubleshooting:

- **If you get "streamlit is not recognized":**
  ```powershell
  pip install streamlit
  ```

- **If you need to stop the app:** Press `Ctrl + C` in the terminal

- **If the browser doesn't open automatically:** Go to `http://localhost:8501` manually

### Alternative Method (if in VS Code):
- Open the **Terminal** in VS Code (`Ctrl + `` ` ``)
- Make sure you're in the correct directory
- Run: `streamlit run streamlit_personality_app.py`

---

**✅ The Plotly error has been fixed in the updated streamlit_personality_app.py file!**

In [None]:
# Quick launch command (run this to start the app)
import subprocess
import os
import time

print("🚀 Launching Streamlit app...")
print("\n⏳ This will open a new browser tab with your personality prediction app!")
print("\n🛑 To stop the app, interrupt this cell or close the terminal.")
print("\n" + "="*50)

try:
    # Launch the Streamlit app
    subprocess.run(["streamlit", "run", "streamlit_personality_app.py"], check=True)
except KeyboardInterrupt:
    print("\n\n🛑 App stopped by user")
except subprocess.CalledProcessError as e:
    print(f"\n❌ Error launching app: {e}")
    print("\n💡 Try running manually: streamlit run streamlit_personality_app.py")
except FileNotFoundError:
    print("\n❌ Streamlit not found! Please install it first:")
    print("   pip install streamlit")
    print("\n   Then run: streamlit run streamlit_personality_app.py")

🚀 Launching Streamlit app...

⏳ This will open a new browser tab with your personality prediction app!

🛑 To stop the app, interrupt this cell or close the terminal.



## 🚀 How to Run the Streamlit App from Terminal

**Instead of running the cell above, follow these steps to launch the app from your terminal:**

### Windows PowerShell Instructions:

1. **Open PowerShell** (Press `Win + X` and select "Windows PowerShell" or "Terminal")

2. **Navigate to your project directory:**
   ```powershell
   cd "c:\Users\komil\PycharmProjects\pythonProject\Pytorch\data\Kaggle_Competitions\playground-series-s5e7"
   ```

3. **Run the Streamlit app:**
   ```powershell
   streamlit run streamlit_personality_app.py
   ```

4. **The app will automatically open in your browser** at `http://localhost:8501`

### Troubleshooting:

- **If you get "streamlit is not recognized":**
  ```powershell
  pip install streamlit
  ```

- **If you need to stop the app:** Press `Ctrl + C` in the terminal

- **If the browser doesn't open automatically:** Go to `http://localhost:8501` manually

### Alternative Method (if in VS Code):
- Open the **Terminal** in VS Code (`Ctrl + `` ` ``)
- Make sure you're in the correct directory
- Run: `streamlit run streamlit_personality_app.py`

## 9. App Screenshots and Demo

Here's what your Streamlit app will look like:

In [None]:
# Create a simple demo of what the app predictions look like
print("🎭 DEMO: What the app predictions look like")
print("=" * 50)

# Sample predictions using the model
sample_inputs = [
    "High social activity, outgoing personality",
    "Prefers quiet environments, reflective nature",
    "Balanced social preferences"
]

# Create some sample feature vectors
for i, description in enumerate(sample_inputs):
    print(f"\n🧪 Sample {i+1}: {description}")
    
    # Create random but realistic sample data
    np.random.seed(i + 42)  # For reproducible results
    sample_data = {}
    
    for feature in numerical_columns:
        if feature in feature_stats:
            stats = feature_stats[feature]
            # Generate sample based on description
            if i == 0:  # Extrovert-like
                value = stats['mean'] + 0.5 * stats['std']
            elif i == 1:  # Introvert-like
                value = stats['mean'] - 0.5 * stats['std']
            else:  # Balanced
                value = stats['mean']
            
            sample_data[feature] = np.clip(value, stats['min'], stats['max'])
    
    for feature in categorical_columns:
        if feature in label_encoders:
            # Random categorical value
            sample_data[feature] = np.random.choice(label_encoders[feature].classes_)
    
    # Make prediction
    sample_df = pd.DataFrame([sample_data])
    
    # Encode categorical variables
    for feature in categorical_columns:
        if feature in sample_df.columns and feature in label_encoders:
            le = label_encoders[feature]
            sample_df[feature] = le.transform([sample_data[feature]])
    
    # Ensure all features are present
    sample_df = sample_df.reindex(columns=feature_columns, fill_value=0)
    
    # Predict
    pred = xgb_model.predict(sample_df)[0]
    pred_proba = xgb_model.predict_proba(sample_df)[0]
    predicted_label = target_encoder.inverse_transform([pred])[0]
    confidence = max(pred_proba)
    
    print(f"   🎯 Prediction: {predicted_label}")
    print(f"   📊 Confidence: {confidence:.1%}")
    print(f"   📈 Probabilities: Extrovert {pred_proba[1]:.1%}, Introvert {pred_proba[0]:.1%}")

print("\n" + "=" * 50)
print("🎉 Your Streamlit app is ready to make predictions like these!")
print("Launch the app to start exploring personality predictions interactively.")