In [None]:
from azureml.core import Workspace, Dataset
import pandas as pd

# Initialize workspace
ws = Workspace.from_config()
datastore = ws.get_default_datastore()

# Load cost data
data_ref = [(datastore, 'clv-data/data_with_costs.csv')]
dataset = Dataset.Tabular.from_delimited_files(data_ref)
df = dataset.to_pandas_dataframe()

# Customer aggregation
customer_data = df.groupby('CustomerID').agg(
    Recency=('InvoiceDate', lambda x: (df['InvoiceDate'].max() - x.max()).days),
    Frequency=('InvoiceNo', 'nunique'),
    Monetary=('TotalRevenue', 'sum'),
    Total_Profit=('Profit', 'sum'),
    Country=('Country', 'first')
).reset_index()

# CLV calculation (3-month projection)
customer_data['CLV'] = customer_data['Total_Profit'] * 0.25

# Save features
customer_data.to_csv('customer_features.csv', index=False)
datastore.upload_files(files=['customer_features.csv'], target_path='clv-data/', overwrite=True)
print("Feature engineering complete!")