# Solution: Cascading Models

In [None]:
import seaborn as sns
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import accuracy_score

# 1. Load Data
df = sns.load_dataset('diamonds').dropna().sample(1000, random_state=42)

# Features for Price Model (Carat, Depth, Table)
X_reg = df[['carat', 'depth', 'table']]
y_reg = df['price']

# Split
X_train, X_test, y_train, y_test = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)

# 2. Train Regression (Price Predictor)
reg = LinearRegression()
reg.fit(X_train, y_train)
price_preds_train = reg.predict(X_train)
price_preds_test = reg.predict(X_test)

# 3. Classification Task (Is it Premium Cut?)
y_class_train = df.loc[X_train.index, 'cut'].apply(lambda x: 1 if x in ['Premium', 'Ideal'] else 0)
y_class_test = df.loc[X_test.index, 'cut'].apply(lambda x: 1 if x in ['Premium', 'Ideal'] else 0)

# ADD Predicted Price as a Feature!
X_class_train = X_train.copy()
X_class_train['Predicted_Price'] = price_preds_train

X_class_test = X_test.copy()
X_class_test['Predicted_Price'] = price_preds_test

# 4. Train Classifier
clf = LogisticRegression()
clf.fit(X_class_train, y_class_train)

# 5. Evaluate
print(f"Classification Accuracy (with Price feature): {accuracy_score(y_class_test, clf.predict(X_class_test)):.4f}")