In [None]:
#1. Install required packages (only if not already available)
!pip install xgboost --quiet

#2. Import libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#3. Synthetic Dataset Generator (Replace this when real data is ready)

def load_data(dummy=True, path=None):
    if dummy:
        np.random.seed(42)
        n = 500

        revenue = np.random.randint(10000, 500000, size=n)
        expenses = np.random.randint(8000, 450000, size=n)
        cash = np.random.randint(10000, 1000000, size=n)
        runway = np.random.randint(3, 24, size=n)
        debt = np.random.randint(0, 500000, size=n)
        funding = np.random.randint(50000, 2000000, size=n)

        df = pd.DataFrame({
            'monthly_revenue': revenue,
            'monthly_expenses': expenses,
            'cash_reserve': cash,
            'runway_months': runway,
            'debt': debt,
            'funding_received': funding,
            'net_profit_margin': ((revenue - expenses) / revenue).clip(-1.0, 0.5),
        })

        # Financial score (normalized custom formula)
        df['financial_score'] = (
            (revenue / expenses) * 15 +
            (cash / 10000) * 0.5 +
            runway * 1.5 -
            (debt / 50000) * 1.2 +
            df['net_profit_margin'] * 20 +
        ).clip(0, 100).round(2)

        return df
    else:
        return pd.read_csv(path)

In [None]:
#4. Preprocessing

def preprocess_data(df):
    X = df.drop(columns=['financial_score'])
    y = df['financial_score']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_scaled, y, test_size=0.2, random_state=42
    )
    return X_train, X_test, y_train, y_test

In [None]:
#5. Train XGBoost Regressor

def train_model(X_train, y_train):
    model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=4)
    model.fit(X_train, y_train)
    return model

In [None]:
#6. Evaluate Performance

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"📉 MAE: {mae:.2f}")
    print(f"📊 R² Score: {r2:.2f}")

    # Visualize prediction vs actual
    plt.figure(figsize=(8, 5))
    sns.scatterplot(x=y_test, y=y_pred)
    plt.xlabel("Actual Score")
    plt.ylabel("Predicted Score")
    plt.title("Actual vs Predicted Financial Score")
    plt.grid(True)
    plt.show()

In [None]:
#7. Run Entire Pipeline

def run_pipeline(dummy=True, path=None):
    df = load_data(dummy=dummy, path=path)
    print(f"✅ Dataset shape: {df.shape}")
    display(df.head())

    X_train, X_test, y_train, y_test = preprocess_data(df)
    model = train_model(X_train, y_train)
    evaluate_model(model, X_test, y_test)

In [None]:
#8. Execute

# Change dummy=False and path='your_file.csv' when your real dataset is ready.
run_pipeline(dummy=True)