<a href="https://colab.research.google.com/github/SAIKUMAR500/poly-model-/blob/main/Untitled27.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# 1. Generate Synthetic Dataset with Mixed Patterns
np.random.seed(42)
X = np.random.rand(1000, 1) * 4 - 2  # X between -2 and 2
y = np.where(X < 0,
             2 * X + 1,              # Linear pattern for X < 0
             0.5 * X**2 + X - 1      # Quadratic pattern for X >= 0
            ).squeeze() + np.random.normal(0, 0.2, 1000)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 2. Initialize and Train Decision Tree
partition_tree = DecisionTreeRegressor(
    max_leaf_nodes=8,    # Controls model complexity
    min_samples_leaf=20, # Prevent overfitting
    random_state=42
)
partition_tree.fit(X_train, y_train)

# 3. Train Leaf Models and Calculate Optimal Blending
leaf_models = {}
leaf_ids = partition_tree.apply(X_train)

for leaf in np.unique(leaf_ids):
    # Get samples in current leaf
    mask = (leaf_ids == leaf)
    X_leaf = X_train[mask]
    y_leaf = y_train[mask]

    # Skip small leaves (safety check)
    if len(y_leaf) < 5:
        continue

    # A. Fit Linear Model
    lin_reg = LinearRegression()
    lin_reg.fit(X_leaf, y_leaf)
    y_lin = lin_reg.predict(X_leaf)

    # B. Fit Quadratic Model
    poly = PolynomialFeatures(degree=2)
    X_poly = poly.fit_transform(X_leaf)
    poly_reg = LinearRegression()
    poly_reg.fit(X_poly, y_leaf)
    y_poly = poly_reg.predict(X_poly)

    # C. Calculate Optimal Blend Weight (alpha)
    # Solve: argminₐ Σ(y - (a*y_lin + (1-a)*y_poly))²
    numerator = np.sum((y_lin - y_poly) * (y_leaf - y_poly))
    denominator = np.sum((y_lin - y_poly)**2)
    alpha = numerator / denominator if denominator != 0 else 0.5
    alpha = np.clip(alpha, 0, 1)  # Constrain to [0,1]

    # Store components
    leaf_models[leaf] = {
        'linear': lin_reg,
        'poly': poly_reg,
        'poly_transformer': poly,
        'alpha': alpha
    }

# 4. Prediction Function
def hybrid_predict(X):
    leaves = partition_tree.apply(X)
    return np.array([(
        leaf_models[leaf]['alpha'] *
        leaf_models[leaf]['linear'].predict([x])[0] +
        (1 - leaf_models[leaf]['alpha']) *
        leaf_models[leaf]['poly'].predict(
            leaf_models[leaf]['poly_transformer'].transform([x])
        )[0]
    ) for x, leaf in zip(X, leaves)])

# 5. Evaluate Model
print("Hybrid Tree Performance:")
print(f"Train MSE: {mean_squared_error(y_train, hybrid_predict(X_train)):.4f}")
print(f"Test MSE:  {mean_squared_error(y_test, hybrid_predict(X_test)):.4f}")

# Compare with baseline models
print("\nBaseline Comparisons:")
# Regular Decision Tree
print(f"Pure Tree Test MSE: {mean_squared_error(y_test, partition_tree.predict(X_test)):.4f}")

# Single Linear Model
lin_base = LinearRegression().fit(X_train, y_train)
print(f"Linear Model Test MSE: {mean_squared_error(y_test, lin_base.predict(X_test)):.4f}")

# Single Polynomial Model
poly = PolynomialFeatures(degree=2)
X_poly_train = poly.fit_transform(X_train)
poly_base = LinearRegression().fit(X_poly_train, y_train)
X_poly_test = poly.transform(X_test)
print(f"Poly Model Test MSE: {mean_squared_error(y_test, poly_base.predict(X_poly_test)):.4f}")

Hybrid Tree Performance:
Train MSE: 0.0386
Test MSE:  0.0362

Baseline Comparisons:
Pure Tree Test MSE: 0.1291
Linear Model Test MSE: 0.3752
Poly Model Test MSE: 0.3769
