In [0]:
# -*- coding: utf-8 -*-
import dataiku
import pandas as pd, numpy as np
from dataiku import pandasutils as pdu



# Read recipe inputs
model_parameters = dataiku.Dataset("model_parameters")
model_parameters_df = model_parameters.get_dataframe()



best_curvature = dataiku.Dataset("best_curvature")
best_curvature_df = best_curvature.get_dataframe()

del best_curvature_df['Adj R-Sq']

price_per_Rx = 1500
total_200_syr_sample = 153
total_300_syr_sample = 206

In [0]:
Variable = ['total_200_syr_sample_t','total_300_syr_sample_t']
segments = list(best_curvature_df['Segment'].unique())

#reading to calculate the sum alpha
Variable_1 = ['total_sales_1', 'total_sales_2', 'total_sales_3']

In [0]:
#Sum alpha calculation
Filter_Data = model_parameters_df[model_parameters_df['Variable'].isin(Variable_1)]
Sum_alpha = Filter_Data.groupby(['Segment'])['Coeff'].sum().reset_index()
Sum_alpha.columns= ['Segment','sum_alpha']

In [0]:
#long term curvature calculation

Filter_Data_1 = model_parameters_df[model_parameters_df['Variable'].isin(Variable)]
Coeff_data = pd.merge(Filter_Data_1,best_curvature_df, how = 'left', on = ['Variable','Segment'])

Coeff_data.drop(Coeff_data.columns[3:7], axis=1, inplace=True)

In [0]:
print(Coeff_data)

In [0]:
# Final response curve data

Final_Data = pd.merge(Coeff_data,Sum_alpha, how = 'left', on = 'Segment')

Final_Data['longterm_A'] = Final_Data['Coeff']/(1-Final_Data['sum_alpha'])
Final_Data['longterm_C'] = Final_Data['Curvature']*(1+1/Final_Data['Lambda']+1/Final_Data['Lambda']**2)
Final_Data['Key'] = 1

In [0]:
#import itertools
# for element in itertools.product(segments, Response, Variable):
#     print(element)

price_per_Rx = 1500

Response = pd.DataFrame(list(np.arange(0,1,.02)))
Response.columns = ['Promotion']
Response['Key'] = 1

Response_Curve = pd.merge(Final_Data,Response, how= 'inner', on= 'Key')
del Response_Curve['Key']

# Response Curve setup
Response_Curve['Annual_Promotion'] = 12*Response_Curve['Promotion']
Response_Curve['Impact'] = Response_Curve['longterm_A']*(1-np.exp(-1*Response_Curve['longterm_C']*Response_Curve['Promotion']))
Response_Curve['Annual_Impact'] = 12*Response_Curve['Impact']

#declare the price  per Rx based on the brand cost
Response_Curve['Revenue'] = Response_Curve['Impact']*price_per_Rx
Response_Curve['Annual_Revenue'] = 12*Response_Curve['Revenue']

# declare the cost of the sample per unit
Response_Curve['Per_Unit_Sample_Cost'] = np.where((Response_Curve['Variable'] == 'total_200_syr_sample_t'), total_200_syr_sample, total_300_syr_sample)
Response_Curve['Total_Cost'] = Response_Curve['Per_Unit_Sample_Cost']*Response_Curve['Promotion']
Response_Curve['Total_Annual_Cost'] = 12*Response_Curve['Total_Cost']

#calculating profit based on the promotion given
Response_Curve['Profit'] = Response_Curve['Revenue'] - Response_Curve['Total_Cost']
Response_Curve['Annual_Profit'] = 12*Response_Curve['Profit']

In [0]:
#Response_Curve['Optimal_Point'] = Response_Curve['Profit'] < Response_Curve['Profit'].shift()
#Response_Curve['Optimal_Point'] = Response_Curve.groupby(['Segment','Variable']).size().reset_index().groupby('Profit')[[0]].max()

Response_Curve_1 = Response_Curve.groupby(['Segment','Variable'])['Annual_Profit'].max().reset_index()
Response_Curve_1['Optimal_Point'] = 1

In [0]:
Final_Response = pd.merge(Response_Curve,Response_Curve_1, how = 'left', on = ['Segment','Variable','Annual_Profit'])
Final_Response.fillna(0, inplace=True)

# calculating optimal at different point
Final_Response['Optimal_Per_HCP_per_month_at_100%_mROI'] = (-1/Final_Response['longterm_C'])*(np.log((0*Final_Response['Per_Unit_Sample_Cost']+Final_Response['Per_Unit_Sample_Cost'])/(Final_Response['longterm_A']*price_per_Rx*Final_Response['longterm_C'])))
Final_Response['Optimal_Per_HCP_per_month_at_125%_mROI'] = (-1/Final_Response['longterm_C'])*(np.log((0.25*Final_Response['Per_Unit_Sample_Cost']+Final_Response['Per_Unit_Sample_Cost'])/(Final_Response['longterm_A']*price_per_Rx*Final_Response['longterm_C'])))
Final_Response['Optimal_Per_HCP_per_month_at_150%_mROI'] = (-1/Final_Response['longterm_C'])*(np.log((0.5*Final_Response['Per_Unit_Sample_Cost']+Final_Response['Per_Unit_Sample_Cost'])/(Final_Response['longterm_A']*price_per_Rx*Final_Response['longterm_C'])))
Final_Response['Optimal_Per_HCP_per_month_at_175%_mROI'] = (-1/Final_Response['longterm_C'])*(np.log((0.75*Final_Response['Per_Unit_Sample_Cost']+Final_Response['Per_Unit_Sample_Cost'])/(Final_Response['longterm_A']*price_per_Rx*Final_Response['longterm_C'])))
Final_Response['Optimal_Per_HCP_per_month_at_200%_mROI'] = (-1/Final_Response['longterm_C'])*(np.log((1*Final_Response['Per_Unit_Sample_Cost']+Final_Response['Per_Unit_Sample_Cost'])/(Final_Response['longterm_A']*price_per_Rx*Final_Response['longterm_C'])))

#Keeping Optimal point only
Final_Response = Final_Response[Final_Response['Optimal_Point'] == 1]

del Final_Response['Impact'],Final_Response['Total_Cost'],Final_Response['Profit'],Final_Response['Revenue'],Final_Response['Optimal_Point']

In [0]:
print(Final_Response)

In [0]:
# Compute recipe outputs from inputs
# TODO: Replace this part by your actual code that computes the output, as a Pandas dataframe
# NB: DSS also supports other kinds of APIs for reading and writing data. Please see doc.



optimal_cal_df = Final_Response # For this sample code, simply copy input to output




# Write recipe outputs
optimal_cal = dataiku.Dataset("optimal_cal")
optimal_cal.write_with_schema(optimal_cal_df)