In [None]:
import os
import sys

sys.path.append('agents')
sys.path.append('utils')

from agents.cleaner import DataCleanerAgent
from agents.engineer import FeatureEngineerAgent
from agents.trainer import ModelTrainerAgent

from utils.logger import AgentLogger
from utils.data_utils import generate_final_report, save_report, print_summary

## Configuration

In [None]:
INPUT_DATA_PATH = 'data/raw_data.csv'
LOGS_DATA_PATH = 'data/logs.txt'
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

if not OPENAI_API_KEY:
    raise Exception('OPENAI_API_KEY not set.')

In [None]:
logger = AgentLogger(LOGS_DATA_PATH)
logger.clear()

## Agent 1: Data Cleaner

In [None]:
cleaner = DataCleanerAgent(api_key = OPENAI_API_KEY, logger = logger)

cleaning_report = cleaner.clean_data(
    input_path = INPUT_DATA_PATH,
    output_path = 'data/clean_data.csv'
)

print('\nData cleaning completed')
print(f"Original shape: {cleaning_report['original_shape']}")
print(f"Cleaned shape: {cleaning_report['cleaned_shape']}")

## Agent 2: Feature Engineer

In [None]:
engineer = FeatureEngineerAgent(api_key = OPENAI_API_KEY, logger = logger)

engineering_report = engineer.engineer_features(
    input_path = 'data/clean_data.csv',
    cleaning_report = cleaning_report,
    output_path = 'data/engineered_data.csv'
)

print('\nFeature engineering completed')
print(f"Target column: {engineering_report['target_column']}")
print(f"Task type: {engineering_report['task_type']}")
print(f"Features created: {engineering_report['features_created']}")
print(f"Final shape: {engineering_report['output_shape']}")

## Agent 3: Model Trainer

In [None]:
trainer = ModelTrainerAgent(api_key = OPENAI_API_KEY, logger = logger)

training_report = trainer.train_model(
    engineering_report = engineering_report
)

print('\nModel training completed')
print(f"Total iterations: {training_report['total_iterations']}")
print(f"Best metrics: {training_report.get('best_metrics', {})}")

### Final report

In [None]:
final_report = generate_final_report(
    cleaning_report = cleaning_report,
    engineering_report = engineering_report,
    training_report = training_report
)

save_report(final_report, 'data/final_report.md')

print_summary(cleaning_report, engineering_report, training_report)

## View Final Report

In [None]:
from IPython.display import Markdown

with open('data/final_report.md', 'r') as f:
    report_content = f.read()

Markdown(report_content)

## Explore Results

All outputs are saved in the `data/` directory:
- `clean_data.csv` - Cleaned dataset
- `engineered_data.csv` - Feature-engineered dataset
- `cleaning_report.json` - Data cleaning details
- `engineering_report.json` - Feature engineering details
- `training_report.json` - Model training details
- `final_report.md` - Comprehensive pipeline report