# Stellar Forge Simulator: Planet Property Model Training

## 1. Imports

In [None]:
import numpy as np
import random
import matplotlib.pyplot as plt
import joblib
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

## 2. Data Generation/Loading

In [None]:
# Simulate loading or generating data
# In a real scenario, this might load data from a file or database.
# Here, we generate synthetic data representing plausible relationships
# between orbital distance and planet properties.

def generate_synthetic_planet_data(num_samples=1000, seed=42):
    """Generates synthetic data for planet properties based on orbital distance."""
    np.random.seed(seed)
    random.seed(seed)
    
    # Orbital distance (AU) - Independent variable
    # Skew distribution slightly towards closer orbits, but allow far ones
    orbital_distance = np.random.gamma(2, 3, num_samples) + 0.2 # Ensure min distance
    orbital_distance = np.clip(orbital_distance, 0.2, 50) # Realistic range
    
    # Planet Mass (Earth masses) - Dependent variable
    # Complex relationship: lower mass close, peak in middle, lower far out
    mass_base = 5 * np.exp(-(orbital_distance - 5)**2 / 15) + 0.5 # Gas giant peak around 5 AU
    mass_rocky = 1.5 * np.exp(-orbital_distance / 1.0) + 0.1 # Rocky planets closer
    mass = mass_base + mass_rocky
    # Add noise
    mass_noise = np.random.normal(0, mass * 0.3, num_samples) # Noise proportional to mass
    mass = np.abs(mass + mass_noise) # Ensure positive mass
    mass = np.clip(mass, 0.01, 1000) # Realistic bounds

    # Planet Radius (Earth radii) - Dependent variable
    # Generally related to mass (mass ~ radius^3 for similar density)
    # Density varies: rocky planets denser than gas giants
    density_factor = 1.0 + 1.5 * np.exp(-orbital_distance / 2.0) # Higher density closer
    radius = (mass / density_factor)**(1/3)
    # Add noise
    radius_noise = np.random.normal(0, radius * 0.15, num_samples)
    radius = np.abs(radius + radius_noise)
    radius = np.clip(radius, 0.1, 25) # Realistic bounds (Earth=1, Jupiter~11)
    
    # Planet Temperature (K) - Dependent variable (Simplified)
    # Assume star luminosity is constant, temperature decreases with distance (T ~ 1/sqrt(d))
    # Let's use a base temperature at 1 AU (e.g., 280 K)
    temperature = 280 / np.sqrt(orbital_distance)
    # Add noise
    temp_noise = np.random.normal(0, 20, num_samples) # Constant noise level
    temperature = np.abs(temperature + temp_noise)
    temperature = np.clip(temperature, 10, 2000) # Realistic bounds
    
    # Reshape orbital_distance for scikit-learn
    X = orbital_distance.reshape(-1, 1)
    
    # Target variables
    y_mass = mass
    y_radius = radius
    y_temp = temperature
    
    return X, y_mass, y_radius, y_temp

# Generate the data
X, y_mass, y_radius, y_temp = generate_synthetic_planet_data(num_samples=1500)

# Split data into training and testing sets (for all targets)
X_train, X_test, y_mass_train, y_mass_test = train_test_split(X, y_mass, test_size=0.2, random_state=42)
_, _, y_radius_train, y_radius_test = train_test_split(X, y_radius, test_size=0.2, random_state=42)
_, _, y_temp_train, y_temp_test = train_test_split(X, y_temp, test_size=0.2, random_state=42)

print(f"Generated {len(X)} samples.")
print(f"Training set size: {len(X_train)}")
print(f"Test set size: {len(X_test)}")

## 3. Data Visualization (Pre-Training)

In [None]:
fig, axs = plt.subplots(3, 1, figsize=(10, 15), sharex=True)

axs[0].scatter(X_train, y_mass_train, alpha=0.5, label='Train Data', s=10)
axs[0].scatter(X_test, y_mass_test, alpha=0.5, label='Test Data', s=10, c='red')
axs[0].set_ylabel('Mass (Earth Masses)')
axs[0].set_title('Orbital Distance vs. Planet Mass')
axs[0].legend()
axs[0].grid(True)

axs[1].scatter(X_train, y_radius_train, alpha=0.5, label='Train Data', s=10)
axs[1].scatter(X_test, y_radius_test, alpha=0.5, label='Test Data', s=10, c='red')
axs[1].set_ylabel('Radius (Earth Radii)')
axs[1].set_title('Orbital Distance vs. Planet Radius')
axs[1].legend()
axs[1].grid(True)

axs[2].scatter(X_train, y_temp_train, alpha=0.5, label='Train Data', s=10)
axs[2].scatter(X_test, y_temp_test, alpha=0.5, label='Test Data', s=10, c='red')
axs[2].set_xlabel('Orbital Distance (AU)')
axs[2].set_ylabel('Temperature (K)')
axs[2].set_title('Orbital Distance vs. Planet Temperature')
axs[2].legend()
axs[2].grid(True)

plt.tight_layout()
plt.show()

## 4. Model Definition and Hyperparameter Tuning

In [None]:
# We will use Polynomial Regression to capture non-linear relationships.
# We need to tune the degree of the polynomial.
# We will train separate models for mass, radius, and temperature.

def tune_polynomial_regression(X_train, y_train):
    """Tunes the degree of polynomial regression using GridSearchCV."""
    pipeline = Pipeline([
        ('poly', PolynomialFeatures()),
        ('linear', LinearRegression())
    ])
    
    # Define the parameter grid to search
    # Degrees 1 (linear) to 7 seem reasonable for this complexity
    param_grid = {'poly__degree': np.arange(1, 8)}
    
    # Use GridSearchCV for hyperparameter tuning
    search = GridSearchCV(pipeline, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    search.fit(X_train, y_train)
    
    print(f"Best parameters found: {search.best_params_}")
    print(f"Best cross-validation score (Negative MSE): {search.best_score_:.4f}")
    
    return search.best_estimator_, search.best_params_['poly__degree']

# Tune model for Mass
print("--- Tuning for Mass ---")
best_mass_model, best_mass_degree = tune_polynomial_regression(X_train, y_mass_train)

# Tune model for Radius
print("\n--- Tuning for Radius ---")
best_radius_model, best_radius_degree = tune_polynomial_regression(X_train, y_radius_train)

# Tune model for Temperature
print("\n--- Tuning for Temperature ---")
best_temp_model, best_temp_degree = tune_polynomial_regression(X_train, y_temp_train)

## 5. Model Training (Using Best Hyperparameters)

In [None]:
# The best models are already trained by GridSearchCV on the full training data
# We can directly use best_mass_model, best_radius_model, best_temp_model
print("Models trained using the best hyperparameters found during tuning.")
print(f"Mass Model: Polynomial Degree {best_mass_degree}")
print(f"Radius Model: Polynomial Degree {best_radius_degree}")
print(f"Temperature Model: Polynomial Degree {best_temp_degree}")

## 6. Model Evaluation

In [None]:
def evaluate_model(model, X_test, y_test, target_name):
    """Evaluates the model on the test set and prints metrics."""
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"--- Evaluation for {target_name} ---")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"R-squared (R2) Score:   {r2:.4f}")
    return y_pred

# Evaluate Mass Model
y_mass_pred = evaluate_model(best_mass_model, X_test, y_mass_test, "Mass")

# Evaluate Radius Model
y_radius_pred = evaluate_model(best_radius_model, X_test, y_radius_test, "Radius")

# Evaluate Temperature Model
y_temp_pred = evaluate_model(best_temp_model, X_test, y_temp_test, "Temperature")

## 7. Results Visualization

In [None]:
# Generate predictions across the full range of distances for plotting the learned curve
X_range = np.linspace(X.min(), X.max(), 500).reshape(-1, 1)

mass_curve = best_mass_model.predict(X_range)
radius_curve = best_radius_model.predict(X_range)
temp_curve = best_temp_model.predict(X_range)

# Plot results
fig, axs = plt.subplots(3, 1, figsize=(12, 18), sharex=True)

# Mass Plot
axs[0].scatter(X_train, y_mass_train, alpha=0.3, label='Train Data', s=10)
axs[0].scatter(X_test, y_mass_test, alpha=0.5, label='Test Data', s=15, c='orange')
axs[0].plot(X_range, mass_curve, color='red', linewidth=2, label=f'Poly Degree {best_mass_degree} Fit')
axs[0].set_ylabel('Mass (Earth Masses)')
axs[0].set_title('Model Fit: Orbital Distance vs. Planet Mass')
axs[0].legend()
axs[0].grid(True)
axs[0].set_ylim(bottom=0)

# Radius Plot
axs[1].scatter(X_train, y_radius_train, alpha=0.3, label='Train Data', s=10)
axs[1].scatter(X_test, y_radius_test, alpha=0.5, label='Test Data', s=15, c='orange')
axs[1].plot(X_range, radius_curve, color='red', linewidth=2, label=f'Poly Degree {best_radius_degree} Fit')
axs[1].set_ylabel('Radius (Earth Radii)')
axs[1].set_title('Model Fit: Orbital Distance vs. Planet Radius')
axs[1].legend()
axs[1].grid(True)
axs[1].set_ylim(bottom=0)

# Temperature Plot
axs[2].scatter(X_train, y_temp_train, alpha=0.3, label='Train Data', s=10)
axs[2].scatter(X_test, y_temp_test, alpha=0.5, label='Test Data', s=15, c='orange')
axs[2].plot(X_range, temp_curve, color='red', linewidth=2, label=f'Poly Degree {best_temp_degree} Fit')
axs[2].set_xlabel('Orbital Distance (AU)')
axs[2].set_ylabel('Temperature (K)')
axs[2].set_title('Model Fit: Orbital Distance vs. Planet Temperature')
axs[2].legend()
axs[2].grid(True)
axs[2].set_ylim(bottom=0)

plt.tight_layout()
plt.show()

## 8. Model Saving

In [None]:
# Save the trained models for use in the game
model_filename_mass = 'planet_mass_model.joblib'
model_filename_radius = 'planet_radius_model.joblib'
model_filename_temp = 'planet_temp_model.joblib'

joblib.dump(best_mass_model, model_filename_mass)
joblib.dump(best_radius_model, model_filename_radius)
joblib.dump(best_temp_model, model_filename_temp)

print(f"Mass model saved to {model_filename_mass}")
print(f"Radius model saved to {model_filename_radius}")
print(f"Temperature model saved to {model_filename_temp}")

## 9. Example Usage (Loading and Predicting)

In [None]:
# Load the models (as the game would)
loaded_mass_model = joblib.load(model_filename_mass)
loaded_radius_model = joblib.load(model_filename_radius)
loaded_temp_model = joblib.load(model_filename_temp)

# Example: Predict properties for planets at specific distances
example_distances = np.array([0.5, 1.0, 5.0, 10.0, 30.0]).reshape(-1, 1)

predicted_mass = loaded_mass_model.predict(example_distances)
predicted_radius = loaded_radius_model.predict(example_distances)
predicted_temp = loaded_temp_model.predict(example_distances)

# Ensure predictions are physically plausible (non-negative)
predicted_mass = np.clip(predicted_mass, 0.01, None) 
predicted_radius = np.clip(predicted_radius, 0.1, None)
predicted_temp = np.clip(predicted_temp, 10, None)

print("\n--- Example Predictions ---")
for i, dist in enumerate(example_distances.flatten()):
    print(f"Distance: {dist:.2f} AU -> Mass: {predicted_mass[i]:.2f} EM, Radius: {predicted_radius[i]:.2f} ER, Temp: {predicted_temp[i]:.1f} K")