# MODULE 2: Feature Engineering
## Creating 35+ Engineered Features for ML Models

**Objective**: Transform raw data into meaningful features for machine learning models.

**Output**: Processed dataset with engineered features ready for modeling.

In [None]:
import pandas as pd
import numpy as np
import sys
sys.path.append('..')

from src.data.loader import DataLoader
from src.data.cleaner import DataCleaner
from src.data.feature_engineer import FeatureEngineer

## 1. Load and Clean Data

In [None]:
loader = DataLoader(data_dir='../data')
df = loader.load_raw_data()

cleaner = DataCleaner()
df_clean = cleaner.clean_dataset(df)

print(f"Cleaned dataset shape: {df_clean.shape}")

## 2. Create Engineered Features

In [None]:
engineer = FeatureEngineer()
df_features = engineer.create_features(df_clean)

print(f"Features created: {len(engineer.get_feature_list())}")
print("\nNew features:")
for feature in engineer.get_feature_list():
    print(f"  - {feature}")

## 3. Feature Categories

### 3.1 Deal Features
- Deal success indicator
- Implied valuation
- Deal complexity score

In [None]:
# TODO: Implement deal feature creation

### 3.2 Valuation Features
- Asked valuation
- Valuation gap
- Revenue multiples
- Profit multiples

In [None]:
# TODO: Implement valuation feature creation

### 3.3 Shark Features
- Number of sharks involved
- Shark combination patterns
- Shark preference scores

In [None]:
# TODO: Implement shark feature creation

## 4. Save Processed Data

In [None]:
loader.save_processed_data(df_features, 'processed_data.csv')
print("Processed data saved successfully!")

## 5. Feature Summary

In [None]:
df_features.describe()