In [1]:
from azureml.core import Workspace, Dataset
import pandas as pd

# Initialize workspace
ws = Workspace.from_config()
datastore = ws.get_default_datastore()

# Load cleaned data
data_ref = [(datastore, 'clv-data/cleaned_data.csv')]
dataset = Dataset.Tabular.from_delimited_files(data_ref)
df = dataset.to_pandas_dataframe()

# Cost calculation
SHIPPING_FIXED = 5
SHIPPING_PER_ITEM = 0.5
HANDLING_PER_ITEM = 0.3

df['Cost'] = (df['UnitPrice'] * 0.7) + \
             SHIPPING_FIXED/df.groupby('InvoiceNo')['InvoiceNo'].transform('count') + \
             (SHIPPING_PER_ITEM + HANDLING_PER_ITEM) * df['Quantity']

df['Profit'] = df['TotalRevenue'] - df['Cost']

# Save data with costs
df.to_csv('data_with_costs.csv', index=False)
datastore.upload_files(files=['data_with_costs.csv'], target_path='clv-data/', overwrite=True)
print("Cost modeling complete!")