In [26]:
import numpy as np
import scipy
from sklearn.tree import DecisionTreeRegressor
import pandas as pd
import cvxpy as cvx

In [2]:
pendle_borough_data = pd.read_csv('dataset/extracts/pendle_borough_records_extracts.csv', encoding='latin1')
rochdale_borough_data = pd.read_csv('dataset/extracts/rochdale_borough_records_extracts.csv', encoding='latin1')
stockport_metropolitan_borough_data = pd.read_csv('dataset/extracts/stockport_metropolitan_borough_records_extracts.csv', encoding='latin1')

In [3]:
tree = DecisionTreeRegressor()

In [9]:
pendle_borough_data.head()

Unnamed: 0,supplier_name,value,department,service_description,privilege,trade_cat,service_category
0,British Telecommunications Plc,9000.0,Financial Services,Telephones : Central,Utility,64200000,member
1,BROXAP LIMITED,5424.52,Parks & Recreation Services,Grounds : R & M : Day to Day : Routine,Material Handling,45233293,maintenance
2,Landscape Engineering Ltd,14900.0,Parks & Recreation Services,Grounds : R & M : Day to Day : Routine,Material Handling,45000000,maintenance
3,Landscape Engineering Ltd,14900.0,Parks & Recreation Services,Grounds : R & M : Day to Day : Routine,Material Handling,45000000,maintenance
4,BUSINESS IN THE COMMUNITY,5000.0,Economic Development & Tourism,Miscellaneous,Education,80000000,misc


In [18]:
rochdale_borough_data.head()

Unnamed: 0,supplier_name,account_name,service,total_value,privilege,trade_cat,service_category
0,ACORN RECOVERY PROJECTS,PH OTHER CONTRACTS,PUBLIC HEALTH,5790.0,Health,85100000,health
1,BARNARDOS,PH BUSINESS CASES,PUBLIC HEALTH,5516.0,Health,85300000,health
2,EARLY BREAK,ACTIVITIES,PUBLIC HEALTH,53913.0,Material,44221000,health
3,EARLY BREAK,ACTIVITIES,PUBLIC HEALTH,49502.0,Social,98000000,health
4,EARLY BREAK,ACTIVITIES,PUBLIC HEALTH,49502.0,Social,98000000,health


In [25]:
input_data = pendle_borough_data.loc[:, ['privilege', 'service_category', 'value']]
privilege_data = input_data.groupby(by=['privilege']).sum()
service_data = input_data.groupby(by=['service_category']).sum()

In [84]:
s = cvx.Variable(service_data.values.shape[0])
p = cvx.Variable(privilege_data.values.shape[0])

service = cvx.matmul(s, service_data.values[:,0])
privilege = cvx.matmul(p, privilege_data.values[:,0])

dmu_s = np.array([0.1, 0.6, 0.2, 0.5, 0.7, 0.9])
dmu_p = np.array([0.2, 0.9, 0.1, 0.3, 0.4, 0.1, 0.1, 0.5])

# objective function
objective = cvx.Maximize(service)

# constraints
constraints = [cvx.matmul(s, dmu_s) - cvx.matmul(p, dmu_p) <= 0, privilege == 1, s >= 0, p >= 0]

# use cvxpy to solve the objective
problem = cvx.Problem(objective, constraints).solve(verbose=True, solver=cvx.SCS, max_iters=500)

----------------------------------------------------------------------------
	SCS v2.1.2 - Splitting Conic Solver
	(c) Brendan O'Donoghue, Stanford University, 2012
----------------------------------------------------------------------------
Lin-sys: sparse-direct, nnz in A = 36
eps = 1.00e-04, alpha = 1.50, max_iters = 500, normalize = 1, scale = 1.00
acceleration_lookback = 0, rho_x = 1.00e-03
Variables n = 14, constraints m = 16
Cones:	primal zero / dual free vars: 1
	linear vars: 15
WARN: aa_init returned NULL, no acceleration applied.
Setup time: 3.11e-02s
----------------------------------------------------------------------------
 Iter | pri res | dua res | rel gap | pri obj | dua obj | kap/tau | time (s)
----------------------------------------------------------------------------
     0| 1.36e+19  8.44e+21  1.00e+00 -2.58e+22  8.95e+21  6.56e+21  2.99e-02 
   100| 6.33e-04  9.02e-02  2.43e-03 -2.32e+02 -2.31e+02  7.95e-14  3.12e-02 
   200| 5.80e-05  2.11e-02  3.82e-04 -2.22e+0

  "Solution may be inaccurate. Try another solver, "


In [87]:
problem

219.13029179975655

In [86]:
service_data.index, privilege_data.index

(Index(['data', 'expense', 'finance', 'maintenance', 'member', 'misc'], dtype='object', name='service_category'),
 Index(['Administration', 'Data', 'Education', 'Equipment', 'Insurance',
        'Material Handling', 'Transport', 'Utility'],
       dtype='object', name='privilege'))

In [23]:
input_data = rochdale_borough_data.loc[:, ['privilege', 'service_category', 'total_value']]
input_data.groupby(by=['privilege', 'service_category']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_value
privilege,service_category,Unnamed: 2_level_1
Administration,economy,15100.00
Administration,education,10999.40
Administration,environment,115288.73
Administration,health,5423.00
Administration,member,20000.00
...,...,...
Transport,maintenance,56611.00
Transport,misc,7280.00
Utility,education,29400.00
Utility,environment,166470.82
