In [None]:
from azureml.core import Workspace, Dataset
import pandas as pd

# Initialize workspace
ws = Workspace.from_config()

# Load dataset
dataset = Dataset.get_by_name(ws, name='online_retail_raw')
df = dataset.to_pandas_dataframe()

# Data cleaning
df = df[df['CustomerID'].notna()]
df = df[df['Quantity'] > 0]
df = df[~df['InvoiceNo'].astype(str).str.startswith('C')]

# Basic feature engineering
df['TotalRevenue'] = df['Quantity'] * df['UnitPrice']
df['InvoiceDate'] = pd.to_datetime(df['InvoiceDate'])

# Save cleaned data
df.to_csv('cleaned_data.csv', index=False)

# Upload to datastore
datastore = ws.get_default_datastore()
datastore.upload_files(files=['cleaned_data.csv'], target_path='clv-data/', overwrite=True)
print("Data preprocessing complete!")