In [None]:
# Title: Feature Engineering for Classification
# Description: Create a new feature that could help distinguish between species based on
# logical assumptions and verify its utility.
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Load dataset
df = sns.load_dataset('iris')

# Example of a logical new feature: petal length to width ratio
df['petal_ratio'] = df['petal_length'] / df['petal_width']

# Prepare features and target
X = df.drop('species', axis=1)
y = df['species']

# Check utility using RandomForest and cross-validation
model = RandomForestClassifier(random_state=42)
score_with_new_feature = cross_val_score(model, X, y, cv=5).mean()

# Drop new feature and compare
X_baseline = X.drop('petal_ratio', axis=1)
score_without_new_feature = cross_val_score(model, X_baseline, y, cv=5).mean()

print(f"Accuracy with new feature: {score_with_new_feature:.4f}")
print(f"Accuracy without new feature: {score_without_new_feature:.4f}")