# 🧠 Multiverse Victory Predictor - Model Training
This notebook trains both Logistic Regression and Random Forest models on a multiverse simulation dataset.

In [None]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
import joblib


In [None]:

df = pd.read_csv("simulated_multiverse_dataset.csv")
df.head()


In [None]:

df["num_heroes"] = pd.to_numeric(df["num_heroes"], errors="coerce")
df["enemy_mind_state"] = df["enemy_mind_state"].replace('???', np.nan)


In [None]:

numerical_features = [
    'team_strength', 'enemy_strength', 'num_heroes', 'num_enemies',
    'team_coordination', 'intel_accuracy', 'previous_failures',
    'diversion_success_rate', 'strategic_plan_complexity',
    'universe_variability', 'enemy_stone_count'
]

categorical_features = [
    'has_time_stone', 'has_surprise_element', 'terrain_advantage',
    'enemy_mind_state', 'has_ironman', 'sacrifice_possible'
]


In [None]:

X = df.drop(columns=['outcome', 'simulation_number'])
y = df['outcome']


In [None]:

num_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='median')),
    ('scaler', StandardScaler())
])

cat_pipeline = Pipeline([
    ('imputer', SimpleImputer(strategy='most_frequent')),
    ('encoder', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer([
    ('num', num_pipeline, numerical_features),
    ('cat', cat_pipeline, categorical_features)
])


In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:

log_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', LogisticRegression(max_iter=1000))
])

log_pipeline.fit(X_train, y_train)
y_pred_log = log_pipeline.predict(X_test)
print("🔍 Logistic Regression Performance:\n")
print(classification_report(y_test, y_pred_log))

joblib.dump(log_pipeline, "multiverse_logistic.pkl")


In [None]:

rf_pipeline = Pipeline([
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))
])

rf_pipeline.fit(X_train, y_train)
y_pred_rf = rf_pipeline.predict(X_test)
print("🌲 Random Forest Performance:\n")
print(classification_report(y_test, y_pred_rf))

joblib.dump(rf_pipeline, "multiverse_random_forest.pkl")
