## 1. Import Required Libraries

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# TensorFlow and Keras for deep learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, callbacks, optimizers

# Scikit-learn for preprocessing and metrics
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

import warnings
warnings.filterwarnings('ignore')

# Import custom modules
import sys
import os
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath('__file__'))))

from src.exception import CustomException
from src.logger import logging
from src.models.deep_learning import DeepLearningModelTrainer

# Set plotting style
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set random seeds for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

print(f"TensorFlow version: {tf.__version__}")
print(f"GPU Available: {tf.config.list_physical_devices('GPU')}")

## 2. Load and Prepare Data

In [None]:
# Load the dataset
df = pd.read_csv('../data/ecommerce_customer.csv')
print(f"Dataset shape: {df.shape}")
df.head()

## 3. Feature Engineering for Deep Learning

In [None]:
# Prepare features for deep learning
# Select numeric features for modeling
numeric_features = ['Avg. Session Length', 'Time on App', 'Time on Website', 'Length of Membership']
target = 'Yearly Amount Spent'

X = df[numeric_features].copy()
y = df[target].copy()

print(f"Features for modeling: {X.columns.tolist()}")
print(f"Target variable: {target}")
print(f"X shape: {X.shape}, y shape: {y.shape}")

# Check for missing values
print(f"\nMissing values:")
print(X.isnull().sum())
print(f"Target missing values: {y.isnull().sum()}")

## 4. Train-Test Split

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=None
)

print(f"Training set: X_train {X_train.shape}, y_train {y_train.shape}")
print(f"Test set: X_test {X_test.shape}, y_test {y_test.shape}")

# Check target distribution
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.hist(y_train, bins=30, alpha=0.7, label='Train')
plt.hist(y_test, bins=30, alpha=0.7, label='Test')
plt.xlabel('Yearly Amount Spent')
plt.ylabel('Frequency')
plt.title('Target Distribution')
plt.legend()

plt.subplot(1, 2, 2)
plt.boxplot([y_train, y_test], labels=['Train', 'Test'])
plt.ylabel('Yearly Amount Spent')
plt.title('Target Distribution Boxplot')

plt.tight_layout()
plt.show()