## Preliminaries

Import necessary functions

In [None]:
from datetime import datetime as dt
import math
import pandas as pd

## Define weights

Create a dictionary object containing the weights for each factor. The default weight is 10 but can range between 0 and 100.

Primary factors should have a higher weight.

In [None]:
fweights = {'r_made': 20, 'r_acc': 20, 'r_com': 20, 'acc_r': 20, 'com_r': 20, 's_url': 10, 's_mail': 10, 's_tel': 10}

In [None]:
w = fweights

## Import data

Import the service data as a `pandas` dataframe.

In [None]:
df = pd.read_csv("https://raw.githubusercontent.com/DiarmuidM/tk-reliability-index/main/data/dummydatav001-ar-19dec22.csv")
df

In [None]:
# Create derived factors
    
df['acc_r'] = df['raccepted'] / df['rmade']
df['com_r'] = df['rcompleted'] / df['raccepted']

## Create population summaries function

Create a function that calculates statistical summaries of relavant service factors i.e., median number of referrals for all service providers.

In [None]:
def statsum(df):
    
    sdata = df
    
    r_made_p50 = sdata['rmade'].median()
    r_acc_p50 = sdata['raccepted'].median()
    r_com_p50 = sdata['rcompleted'].median()
    acc_r_p50 = sdata['acc_r'].median()
    com_r_p50 = sdata['com_r'].median()
    
    summaries = {'r_made_p50': r_made_p50, 'r_acc_p50': r_acc_p50, 'r_com_p50': r_com_p50, 'acc_r_p50': acc_r_p50, 'com_r_p50': com_r_p50}
    return summaries

In [None]:
summaries = statsum(df)
summaries

In [None]:
pop_sum = summaries

In [None]:
pop_sum['r_made_p50']

## Create metric normalisation function

Consider whether this is better off performed in the ri_score() function.

In [None]:
def met_t(factor, factor_p50):
    
    fac_t = 1 / (1 + math.exp(-0.01*(factor - factor_p50)))
    
    return fac_t

In [None]:
# Transform metric variables to normalised versions

df['r_made'] = ""
df['r_acc'] = ""
df['r_com'] = ""

for i in df.index:
    df['r_made'][i] = met_t(df['rmade'][i], pop_sum['r_made_p50'])
    df['r_acc'][i] = met_t(df['raccepted'][i], pop_sum['r_acc_p50'])
    df['r_com'][i] = met_t(df['rcompleted'][i], pop_sum['r_com_p50'])

In [None]:
df[['r_made', 'r_acc', 'r_com']]

## Handle missing values of primary factors

Replace with median value for all services. However also need an indicator of missingness, otherwise a new service will look better than a lower scoring, longer-running service.

In [None]:
## Handle missing values

df.loc[df['r_made'].isna(), 'r_made_miss'] = 1
df.loc[df['r_acc'].isna(), 'r_acc_miss'] = 1
df.loc[df['r_com'].isna(), 'r_com_miss'] = 1
df.loc[df['acc_r'].isna(), 'acc_r_miss'] = 1
df.loc[df['com_r'].isna(), 'com_r_miss'] = 1

df.loc[df['r_made'].isna(), 'r_made'] = pop_sum['r_made_p50']
df.loc[df['r_acc'].isna(), 'r_acc'] = pop_sum['r_acc_p50']
df.loc[df['r_com'].isna(), 'r_com'] = pop_sum['r_com_p50']
df.loc[df['acc_r'].isna(), 'acc_r'] = pop_sum['acc_r_p50']
df.loc[df['com_r'].isna(), 'com_r'] = pop_sum['com_r_p50']

In [None]:
## Calculate reliability index
    
df['ri_score'] = "" # initialise as empty column

# If service is inactive, ri_score = 0

df.loc[df['ServiceStatus']=='inactive', 'ri_score'] = 0


    
df['ri_score'] = 1000 * (((df['r_made'] * w['r_made']) + (df['r_acc'] * w['r_acc']) + (df['r_com'] * w['r_com']) 
                         + (df['acc_r'] * w['acc_r']) + (df['com_r'] * w['com_r']) + (df['ServiceEmail'] * w['s_mail'])
                         + (df['ServiceWeb'] * w['s_url']) + (df['ServiceTelephone'] * w['s_tel']))
                         / sum(w.values())) 

In [None]:
df['ri_score']

In [None]:
df.to_csv("./data/testdata.csv")

## Create RI function

Define a function that calculates a Reliability Index score for service providers.

TASK: how should I loop over service providers: within the function or outside?

In [None]:
def ri_score(df, summaries, fweights):
    df = df
    pop_sum = summaries
    w = fweights
    
    # Transform metric variables to normalised versions
    
    df['r_made'] = met_t(df['rmade'], pop_sum['r_made_p50'])
    
    ## Calculate reliability index
    
    df['ri_score'] = "" # initialise as empty column
    
    df['ri_score'] = (df['r_made'] * w['r_made']) + 
    
    s_res = {'service_id': s_id, 'service_name': s_name, 'reliability_index': ri_score}
    
    return s_res

In [None]:
ri_results = ri_score(fweights)
ri_results