In [None]:
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Feature Engineering - Customer Churn Prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from datetime import datetime\n",
    "from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
    "from sklearn.model_selection import train_test_split"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Load the data\n",
    "df = pd.read_csv('../data/raw/customer_data.csv')\n",
    "\n",
    "# Create time-based features\n",
    "df['account_created'] = pd.to_datetime(df['account_created'])\n",
    "df['last_purchase'] = pd.to_datetime(df['last_purchase'])\n",
    "\n",
    "df['account_age_days'] = (datetime.now() - df['account_created']).dt.days\n",
    "df['days_since_last_purchase'] = (datetime.now() - df['last_purchase']).dt.days\n",
    "df['purchase_frequency'] = df['number_of_purchases'] / df['account_age_days']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Create monetary features\n",
    "df['avg_transaction_value'] = df['total_purchase_amount'] / df['number_of_purchases']\n",
    "df['monthly_spend'] = df['total_purchase_amount'] / (df['account_age_days'] / 30)\n",
    "\n",
    "# Create engagement features\n",
    "df['support_ticket_ratio'] = df['support_tickets'] / df['number_of_purchases']\n",
    "df['engagement_score'] = df['purchase_frequency'] * df['avg_transaction_value']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Handle categorical variables\n",
    "le = LabelEncoder()\n",
    "categorical_cols = ['subscription_tier', 'payment_method', 'country']\n",
    "\n",
    "for col in categorical_cols:\n",
    "    df[f'{col}_encoded'] = le.fit_transform(df[col])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Scale numerical features\n",
    "scaler = StandardScaler()\n",
    "numerical_cols = ['account_age_days', 'days_since_last_purchase', 'purchase_frequency',\n",
    "                 'avg_transaction_value', 'monthly_spend', 'support_ticket_ratio',\n",
    "                 'engagement_score']\n",
    "\n",
    "df[numerical_cols] = scaler.fit_transform(df[numerical_cols])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "source": [
    "# Save processed features\n",
    "final_features = numerical_cols + [f'{col}_encoded' for col in categorical_cols] + ['churn']\n",
    "processed_df = df[final_features]\n",
    "\n",
    "# Split and save data\n",
    "X = processed_df.drop('churn', axis=1)\n",
    "y = processed_df['churn']\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
    "\n",
    "# Save processed data\n",
    "X_train.to_csv('../data/processed/X_train.csv', index=False)\n",
    "X_test.to_csv('../data/processed/X_test.csv', index=False)\n",
    "y_train.to_csv('../data/processed/y_train.csv', index=False)\n",
    "y_test.to_csv('../data/processed/y_test.csv', index=False)"
   ]
  }
 ]
}