# Feature Engineering

In this notebook, we will focus on creating new features from the raw dataset to improve the performance of our machine learning models.

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np

# Load the raw data
train_data = pd.read_csv('../data/raw/train.csv')
test_data = pd.read_csv('../data/raw/test.csv')

# Display the first few rows of the training data
train_data.head()

In [None]:
# Feature Engineering Function
def create_features(df):
    # Example feature: Total amount
    df['total_amount'] = df['amount'] + df['fee']
    
    # Example feature: Time-based features
    df['transaction_time'] = pd.to_datetime(df['transaction_time'])
    df['hour'] = df['transaction_time'].dt.hour
    df['day_of_week'] = df['transaction_time'].dt.dayofweek
    
    return df

# Create features for training and test data
train_data_engineered = create_features(train_data)
test_data_engineered = create_features(test_data)

# Save the engineered datasets
train_data_engineered.to_csv('../data/processed/train_engineered.csv', index=False)
test_data_engineered.to_csv('../data/processed/test_engineered.csv', index=False)

# Display the first few rows of the engineered training data
train_data_engineered.head()