In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [None]:
# --- 1. The Mock Artifacts: Creating our data with more realistic overlap ---

np.random.seed(42)  # For reproducibility

# We'll create overlapping data ranges for the features.
size_planets = np.random.uniform(400, 900, size=50)   # Planets can be smaller
size_comets = np.random.uniform(50, 500, size=50)     # Comets can be larger
size = np.concatenate([size_planets, size_comets])

orbital_speed_planets = np.random.uniform(1, 40, size=50)      # Planets can be faster
orbital_speed_comets = np.random.uniform(20, 80, size=50)      # Comets can be slower
orbital_speed = np.concatenate([orbital_speed_planets, orbital_speed_comets])

In [5]:
# The 'type' status (1 for planet, 0 for comet)
body_type = np.concatenate([np.ones(50), np.zeros(50)])

In [8]:
# Create a Pandas DataFrame to hold our data
celestial_df = pd.DataFrame({'size': size, 'orbital_speed': orbital_speed, 'type': body_type})

In [13]:
# --- 2. Preparing the Ritual: Splitting our data ---
X = celestial_df[['size', 'orbital_speed']]
y = celestial_df['type']
X_train, X_test, y_train, y_test = \
train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# --- 3. The Grand Rituals: Training three different prophets ---

# The Obsessed Prophet (Overfit Model)
# We will create a very complex tree with no limits on its depth.

print("Training the Obsessed Prophet (Overfit Model)...")
overfit_model = DecisionTreeClassifier(random_state=42)
overfit_model.fit(X_train, y_train)

overfit_train_accuracy = \
accuracy_score(y_train, overfit_model.predict(X_train))
overfit_test_accuracy =\
accuracy_score(y_test, overfit_model.predict(X_test))


Training the Obsessed Prophet (Overfit Model)...


In [None]:
# The Lazy Prophet (Underfit Model)
# We will create a very simple tree with a max depth of 1.

print("Training the Lazy Prophet (Underfit Model)...")
underfit_model = DecisionTreeClassifier(max_depth=1, 
                                       random_state=42)
underfit_model.fit(X_train, y_train)

underfit_train_accuracy =\
accuracy_score(y_train, underfit_model.predict(X_train))
underfit_test_accuracy = \
accuracy_score(y_test, underfit_model.predict(X_test))

Training the Lazy Prophet (Underfit Model)...


In [None]:
# The Balanced Prophet (Just right)
# We will create a more balanced tree with a max depth of 3.

print("Training the Balanced Prophet...")
balanced_model = DecisionTreeClassifier(max_depth=3, 
                                       random_state=42)
balanced_model.fit(X_train, y_train)

balanced_train_accuracy = \
accuracy_score(y_train, balanced_model.predict(X_train))
balanced_test_accuracy = \
accuracy_score(y_test, balanced_model.predict(X_test))

Training the Balanced Prophet...


In [19]:
# --- 4. The Grand Revelation: Comparing the flawed prophecies ---
print("\n" + "="*40 + "\n")
print("--- Overfitting vs. Underfitting vs. Balanced ---")
print(f"Overfit Model: Training Acc={overfit_train_accuracy:.2f}, Testing Acc={overfit_test_accuracy:.2f}")
print(f"Underfit Model: Training Acc={underfit_train_accuracy:.2f}, Testing Acc={underfit_test_accuracy:.2f}")
print(f"Balanced Model: Training Acc={balanced_train_accuracy:.2f}, Testing Acc={balanced_test_accuracy:.2f}")



--- Overfitting vs. Underfitting vs. Balanced ---
Overfit Model: Training Acc=1.00, Testing Acc=0.85
Underfit Model: Training Acc=0.93, Testing Acc=0.80
Balanced Model: Training Acc=0.97, Testing Acc=0.85
