In [1]:
import numpy as np
import scipy
from sklearn.tree import DecisionTreeRegressor
import pandas as pd
import cvxpy as cvx
from scipy.stats import chisquare, f_oneway, ttest_ind

In [2]:
pendle_borough_data = pd.read_csv('dataset/extracts/pendle_borough_records_extracts.csv', encoding='latin1')
rochdale_borough_data = pd.read_csv('dataset/extracts/rochdale_borough_records_extracts.csv', encoding='latin1')
stockport_metropolitan_borough_data = pd.read_csv('dataset/extracts/stockport_metropolitan_borough_records_extracts.csv', encoding='latin1')

In [3]:
tree = DecisionTreeRegressor()

In [4]:
pendle_borough_data.head()

Unnamed: 0,supplier_name,value,department,service_description,privilege,trade_cat,service_category
0,British Telecommunications Plc,9000.0,Financial Services,Telephones : Central,Utility,64200000,member
1,BROXAP LIMITED,5424.52,Parks & Recreation Services,Grounds : R & M : Day to Day : Routine,Material Handling,45233293,maintenance
2,Landscape Engineering Ltd,14900.0,Parks & Recreation Services,Grounds : R & M : Day to Day : Routine,Material Handling,45000000,maintenance
3,Landscape Engineering Ltd,14900.0,Parks & Recreation Services,Grounds : R & M : Day to Day : Routine,Material Handling,45000000,maintenance
4,BUSINESS IN THE COMMUNITY,5000.0,Economic Development & Tourism,Miscellaneous,Education,80000000,misc


In [5]:
rochdale_borough_data.head()

Unnamed: 0,supplier_name,account_name,service,total_value,privilege,trade_cat,service_category
0,ACORN RECOVERY PROJECTS,PH OTHER CONTRACTS,PUBLIC HEALTH,5790.0,Health,85100000,health
1,BARNARDOS,PH BUSINESS CASES,PUBLIC HEALTH,5516.0,Health,85300000,health
2,EARLY BREAK,ACTIVITIES,PUBLIC HEALTH,53913.0,Material,44221000,health
3,EARLY BREAK,ACTIVITIES,PUBLIC HEALTH,49502.0,Social,98000000,health
4,EARLY BREAK,ACTIVITIES,PUBLIC HEALTH,49502.0,Social,98000000,health


In [6]:
input_data = pendle_borough_data.loc[:, ['privilege', 'service_category', 'value']]
privilege_data = input_data.groupby(by=['privilege']).sum()
service_data = input_data.groupby(by=['service_category']).sum()

In [326]:
s = cvx.Variable(service_data.values.shape[0])
p = cvx.Variable(privilege_data.values.shape[0])

service = cvx.matmul(s, service_data.values[:,0])
privilege = cvx.matmul(p, privilege_data.values[:,0])

# SPECTRUM
GAMMA_SHAPE=5e4
GAMMA_SCALE=1e30
NORMAL_CENTER=1e30
NORMAL_STD=5e29

dmu_s = np.random.gamma(GAMMA_SHAPE, GAMMA_SCALE, service_data.values.shape[0])
dmu_p = np.random.normal(NORMAL_CENTER, NORMAL_STD, privilege_data.values.shape[0])
# objective function
objective = cvx.Maximize(service)

# constraints
constraints = [cvx.matmul(s, dmu_s) - cvx.matmul(p, dmu_p) <= 0, privilege == 1, s >= 0, p >= 0]

# use cvxpy to solve the objective
problem = cvx.Problem(objective, constraints).solve(verbose=False, solver=cvx.SCS, max_iters=500)

print(f_oneway(privilege_data.values[:,0], service_data.values[:,0]))
print(ttest_ind(privilege_data.values[:,0], service_data.values[:,0]))
print(np.argmax(dmu_s/dmu_s.max()), dmu_s/dmu_s.max(), s.value)
print(chisquare(dmu_s/dmu_s.max(), service_data.values[:,0]/service_data.values.max()))

print(np.argmax(dmu_p/dmu_p.max()), dmu_p/dmu_p.max(), p.value)
print(chisquare(dmu_p/dmu_p.max(), privilege_data.values[:,0]/privilege_data.values.max()))

# ['data', 'expense', 'finance', 'maintenance', 'member', 'misc']
# ['Administration', 'Data', 'Education', 'Equipment', 'Insurance', 'Material Handling', 'Transport', 'Utility']

F_onewayResult(statistic=0.11085660907162993, pvalue=0.744915850624676)
Ttest_indResult(statistic=-0.33295136142029813, pvalue=0.7449158506246765)
3 [0.99744388 0.99328063 0.9912425  1.         0.99077654 0.99592897] [1.04912061e-11 1.05618526e-11 1.06248000e-11 1.04747642e-11
 1.05478367e-11 1.05034762e-11]
Power_divergenceResult(statistic=266.93763301059926, pvalue=1.2721392266594729e-55)
0 [1.         0.90136379 0.74223152 0.56243933 0.64229319 0.9080128
 0.96155978 0.91154753] [1.28584973e-08 9.37015587e-07 8.56691634e-09 4.15186154e-08
 1.57581363e-08 1.37809057e-07 1.18705208e-06 1.57194109e-08]
Power_divergenceResult(statistic=235.61662641933415, pvalue=3.17759368257722e-47)


**Looking at the data:**

- F Test and T Test between Supplier Trade Usage and Service Maximisation indicate a pvalue of 0.74. 

The F ratio is higher for the comparison and T value is small indicating the spread is over the exposures rather than the data and that is what we need

- Chi Frequency is lower for service indicating a single exposure does not signify for service but is greatly seen for privilege

At the input level, It is Service Maximisation less influencial and Trade Usage more influencial, we need to balance them

- We see that exposures are managed perfectly

- We also see that supplier trade usage is more influencial, in order to balance it we need to change the input dmu distribution

- With the Gamma Distribution, we get a superceeded Service rather than the Privilege

In [8]:
service_data.index, privilege_data.index

(Index(['data', 'expense', 'finance', 'maintenance', 'member', 'misc'], dtype='object', name='service_category'),
 Index(['Administration', 'Data', 'Education', 'Equipment', 'Insurance',
        'Material Handling', 'Transport', 'Utility'],
       dtype='object', name='privilege'))

In [9]:
s.value, p.value

(array([4.79461104e-07, 5.09262979e-07, 5.46827181e-07, 4.83229230e-07,
        4.80661696e-07, 4.79291241e-07]),
 array([2.20045759e-07, 6.29520979e-07, 2.22546177e-07, 2.19446274e-07,
        2.22001773e-07, 1.91454387e-07, 1.35436353e-06, 2.19984274e-07]))

In [10]:
input_data = rochdale_borough_data.loc[:, ['privilege', 'service_category', 'total_value']]
input_data.groupby(by=['privilege', 'service_category']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,total_value
privilege,service_category,Unnamed: 2_level_1
Administration,economy,15100.00
Administration,education,10999.40
Administration,environment,115288.73
Administration,health,5423.00
Administration,member,20000.00
...,...,...
Transport,maintenance,56611.00
Transport,misc,7280.00
Utility,education,29400.00
Utility,environment,166470.82
