In [None]:
import pandas as pd #type: ignore
import joblib #type: ignore

from code_files.utils import ( #type: ignore
    RandomState,
    set_seed,
)
from code_files.save_and_compute import ( #type: ignore
    interpret_prediction, 
)

In [None]:
random_state = RandomState(42)
set_seed(42)

In [None]:
# Get data

SAI_data = pd.read_csv('results.csv').iloc[:,1:] # load your SAI data

In [None]:
# Load model

best_model = joblib.load('risk_predictor.pkl')

In [None]:
# Get thresholds

optimal_thresholds = {'green': 0.3,
                      'amber': 0.7}
print(optimal_thresholds)

In [None]:
numeric_features = ['age_group_adjusted_difference','feature_gender_interaction']
categorical_features = ['gender_f','gender_m']
target_column = ['state']

# Apply the preprocessor to the data

preprocessor = joblib.load('clf_scaler.pkl')
X_test = preprocessor.transform(SAI_data[numeric_features + categorical_features])

In [None]:
# Get probabilities
y_prob = best_model.predict_proba(X_test)[:, 1]

In [None]:
#Collect results for the fold into a DataFrame

results = pd.DataFrame({                                #  must include state and probability; gender, age, id and sample date are optional for further evaluation
'id': SAI_data['ids'],
'sample_date': SAI_data['dates'], 
'gender': SAI_data['gender'],
'age_bin': SAI_data['age_bin'],
'old_age_bin': SAI_data['old_age_bin'],
'probability': y_prob.ravel(),
})

# Get traffic light predictions for unlabelled data

tl_predictions = []

for prob in y_prob:
    if prob <= optimal_thresholds['green']:
        tl_predictions.append('Green')  # Confidently negative
    elif prob > optimal_thresholds['green'] and prob <= optimal_thresholds['amber']:
        tl_predictions.append('Amber')  # Uncertain class
    else:
        tl_predictions.append('Red')  # Confidently positive

print(tl_predictions)
tl_predictions_df = pd.DataFrame(tl_predictions)

# Get final model output

final_output = results[['probability', 'gender', 'age_bin', 'old_age_bin', 'id', 'sample_date']].reset_index(drop=True)
print(final_output)

# Combine traffic light predictions with final model output

stratified_output = pd.concat([tl_predictions_df, final_output], axis=1)
stratified_output.columns = ['Traffic Lights', 'Likelihood', 'Gender', 'Age Bin', 'Old Age Bin', 'ID', 'Sample Date']
print(stratified_output)
stratified_output.to_csv('stratified_output.csv', index=False)


['Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Amber', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Amber', 'Red', 'Red', 'Red', 'Amber', 'Red', 'Red', 'Red', 'Amber', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Red', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Amber', 'Red', 'A

In [None]:
# Load weighted mean risk scores

group_weighted_stats = pd.read_csv('weighted_means_dementia.csv').iloc[:, 1:]
print(group_weighted_stats)

In [None]:
# Get adjusted risk scores

adjusted_output = stratified_output.merge(group_weighted_stats, on='Age Bin')
adjusted_output['age_group_adjusted_prob'] = adjusted_output['Likelihood'] - adjusted_output['group_weighted_mean']
print(adjusted_output)

In [None]:
# Get re-classifications for adjusted risk scores

adjusted_output['Interpretation'] = adjusted_output.apply(
    lambda row: interpret_prediction(row['Traffic Lights'], row['age_group_adjusted_prob']),
    axis=1
)