In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
import mlflow
import mlflow.sklearn
import pickle
import os

# Load dataset
df = pd.read_csv('C:\\ml_app_assignment\\personality_dataset.csv')

# Fill missing numerical values
df['Going_outside'] = df['Going_outside'].fillna(df['Going_outside'].median())

# Encode binary and target columns
binary_columns = ['Stage_fear', 'Drained_after_socializing']
for col in binary_columns:
    df[col] = df[col].map({'Yes': 1, 'No': 0})
df['Personality'] = df['Personality'].map({'Introvert': 0, 'Extrovert': 1})

df.dropna(inplace=True)

# Feature/Target split
X = df.drop('Personality', axis=1)
y = df['Personality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model
model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, zero_division=0)
recall = recall_score(y_test, y_pred, zero_division=0)
f1 = f1_score(y_test, y_pred, zero_division=0)

print("Classification Report:\n", classification_report(y_test, y_pred))

# MLflow logging
mlflow.set_experiment("personality_prediction")

with mlflow.start_run() as run:
    run_id = run.info.run_id

    # Params
    mlflow.log_param("model_type", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("max_depth", 5)

    # Metrics
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1_score", f1)

    # Log model as MLflow artifact (native format)
    mlflow.sklearn.log_model(model, artifact_path="mlflow_model")

    # Save and log model.pkl
    os.makedirs("models", exist_ok=True)
    model_path = "models/model.pkl"
    with open(model_path, 'wb') as f:
        pickle.dump(model, f)
    mlflow.log_artifact(model_path, artifact_path="artifacts")

    print("✅ MLflow run completed.")
    print("🆔 Run ID:", run_id)
    print("📁 Model and pickle logged under MLflow artifacts.")
