In [7]:
import pandas as pd
from skimpy import clean_columns

In [86]:
df = pd.read_csv('data/ref_4_star_value.csv')

In [87]:
df.head()

Unnamed: 0,UK Provider Reference Number,Main panel,Unit of assessment (UOA),Sub-profile,Percentage of research activity rated 4*,Percentage of research activity rated 3*,Mainstream QR allocation,FTE Returned,Required number of items to be returned for sub-profile,4* value for single item in sub-profile
0,10007785,A,3,Output,24.5,61.7,757328,63.5,159,11929.24
1,10007785,A,4,Output,3.0,39.4,50669,13.2,33,11950.24
2,10007785,B,8,Output,12.5,72.9,192157,19.0,48,13029.8
3,10007785,B,11,Output,8.3,58.4,143219,19.0,47,13307.22
4,10007785,B,12,Output,16.8,63.6,458530,42.6,106,13227.46


In [88]:
df = clean_columns(df)
df = df.rename(columns = {'unit_of_assessment_uoa': 'unit_of_assessment', 
                          'required_number_of_items_to_be_returned_for_sub_profile': 'outputs_required'})

In [89]:
df.head()

Unnamed: 0,uk_provider_reference_number,main_panel,unit_of_assessment,sub_profile,percentage_of_research_activity_rated_4,percentage_of_research_activity_rated_3,mainstream_qr_allocation,fte_returned,outputs_required,4_value_for_single_item_in_sub_profile
0,10007785,A,3,Output,24.5,61.7,757328,63.5,159,11929.24
1,10007785,A,4,Output,3.0,39.4,50669,13.2,33,11950.24
2,10007785,B,8,Output,12.5,72.9,192157,19.0,48,13029.8
3,10007785,B,11,Output,8.3,58.4,143219,19.0,47,13307.22
4,10007785,B,12,Output,16.8,63.6,458530,42.6,106,13227.46


In [78]:
def four_star_value(
    mainstream_allocation: int | float,
    outputs_required: int,
    three_star_activity: float,
    four_star_activity: float
    ) -> float:
    """
    Calculate the value of a four star output or impact case study.
    
    Parameters:
        mainstream_allocation (int | float): The total mainstream QR funding allocation.
        outputs_required (int): The number of outputs required to be returned for the sub-profile.
        three_star_activity (float): The percentage of three star activity.
        four_star_activity (float): The percentage of four star activity.
    
    Returns:
        float: The value of a four star output or impact case study.
    """
    denominator = (4 * (outputs_required * (four_star_activity / 100))) + (outputs_required * (three_star_activity / 100))
    
    value = round(4 * (mainstream_allocation / denominator), 2)
    
    return value

In [79]:
four_star_value(757328, 159, 61.7, 24.5)

11930.04

In [91]:
df['calculated_value'] = df.apply(lambda df: four_star_value(df['mainstream_qr_allocation'], 
                                                             df['outputs_required'], 
                                                             df['percentage_of_research_activity_rated_3'], 
                                                             df['percentage_of_research_activity_rated_4']), axis=1)

In [92]:
df.head()

Unnamed: 0,uk_provider_reference_number,main_panel,unit_of_assessment,sub_profile,percentage_of_research_activity_rated_4,percentage_of_research_activity_rated_3,mainstream_qr_allocation,fte_returned,outputs_required,4_value_for_single_item_in_sub_profile,calculated_value
0,10007785,A,3,Output,24.5,61.7,757328,63.5,159,11929.24,11930.04
1,10007785,A,4,Output,3.0,39.4,50669,13.2,33,11950.24,11948.83
2,10007785,B,8,Output,12.5,72.9,192157,19.0,48,13029.8,13029.36
3,10007785,B,11,Output,8.3,58.4,143219,19.0,47,13307.22,13306.61
4,10007785,B,12,Output,16.8,63.6,458530,42.6,106,13227.46,13228.61
