In [1]:
import pandas as pd
import openai
import numpy as np
import ast
import os
from dotenv import load_dotenv

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
env_path = '/content/drive/MyDrive/Personalized-Narratives/.env'
load_dotenv(dotenv_path=env_path)

api_key = os.getenv("UNI_API_KEY")


In [4]:
audience_type = "general"

In [None]:
df_narratives = pd.read_excel(f"/content/drive/My Drive/Colab Notebooks/{audience_type}_stories_final.xlsx")

In [None]:
#first narrative
df_narratives.iloc[0,0]

"The AI model predicted that the applicant's loan application would be denied. The primary reason for this decision is the applicant's low annual income. A lower income might indicate a higher risk of default, as the applicant might struggle to meet the repayment schedule. \n\nThe second most influential factor was the high loan amount requested. A larger loan amount can be seen as a higher risk, especially when paired with a lower income, as it might be more difficult for the applicant to repay the loan in full. \n\nFinally, the applicant's history of previous loan defaults also contributed negatively to the prediction. This history suggests a pattern of financial behavior that increases the risk of future defaults. \n\nIn summary, the combination of a low income, a high loan amount, and a history of loan defaults led the model to predict a high risk of default, resulting in the denial of the loan application."

In [None]:
dfs_initial_shap = pd.read_excel(f"/content/drive/My Drive/Colab Notebooks/{audience_type}_initial_shap_values_final.xlsx", sheet_name=None)

In [None]:
dataset_description="whether an applicant is likely to be approved or denied for a loan, based on a collection of personal and financial data.",
input_description= "diverse features such as demographic information, credit history, employment status, income levels, existing debt, and other relevant financial metrics for each applicant",
target_description="whether the applicant is likely to be approved or denied for a loan"

In [None]:
def generate_response(api_key, prompt, temp=0.2, gpt_model='gpt-4'):
    #return 0
    client = openai.OpenAI(api_key=api_key)
    response = client.chat.completions.create(
      model=gpt_model,
      messages=[{"role": "user", "content": prompt}],
      temperature=temp,
      max_tokens=1000
      )
    return response.choices[0].message.content

In [None]:
df_narratives.iloc[0,0]

"The AI model predicted that the applicant's loan application would be denied. The primary reason for this decision is the applicant's low annual income. A lower income might indicate a higher risk of default, as the applicant might struggle to meet the repayment schedule. \n\nThe second most influential factor was the high loan amount requested. A larger loan amount can be seen as a higher risk, especially when paired with a lower income, as it might be more difficult for the applicant to repay the loan in full. \n\nFinally, the applicant's history of previous loan defaults also contributed negatively to the prediction. This history suggests a pattern of financial behavior that increases the risk of future defaults. \n\nIn summary, the combination of a low income, a high loan amount, and a history of loan defaults led the model to predict a high risk of default, resulting in the denial of the loan application."

In [None]:
def generate_prompt(narrative: str, dataset_description: str, input_description: str, target_description: str, feature_info_df):

        """
        Generates SHAPstories for each instance in the given data.

        Parameters:
        -----------
        narrative : str
            A SHAP narrative that was generated to explain the prediction of a particular instance.

        Returns:
        --------
        prompt_string: str
            A prompt for the extractor model
        """

        prompt_string = f"""
        An LLM was used to create a narrative to explain and interpret a prediction
        made by another smaller classifier model. The LLM was given an explanation of
        the classifier task, the training data, and provided with the exact names of all
        the features and their meaning. Most importantly, the LLM was provided with a table
        that contains the feature values of that particular instance, their SHAP values
        which are a numeric measure of their importance. Here is some general info about the task:


        Dataset description: {dataset_description},
        Target description: {target_description}
        Input description: {input_description}

        The LLM returned the following narrative: {narrative}
        Your task is to extract some information about all the features that were mentioned in the narrative as a reason.
        Provide your answer as a python dictionary with the keys as the feature names.
        The values corresponding to the feature name keys are dictionaries themselves that contain the following inner keys

        1) "rank:" indicating the order of absolute importance of the feature starting from 0.
        2) "sign": the sign of whether the feature contributed towards target value 1 or against it (either +1 or -1 for sign value).
        3) "value": if the value of the feature is mentioned in a way that you can put an exact number on, add it. Only return numeric values here.
        If the description of the value is qualitative such as "many" or "often" and not mentioning an exact value, return "None" for its value.



        Make sure that the "rank", "sign", "value" keys and their values are always present in the inner dictionaries.
        Make sure that the "rank" key is sorted from 0 to an increasing value in the dictionary. The first element cannot have any other rank than 0.
        Please just provide the python dictionary as a string and add nothing else to the answer.

        The features and their descriptions are provided in the table below.

        Make sure to use the exact names of the features as provided in the table, including capitalization:
        {feature_info_df[["feature_name","Description"]].to_string(index = False)}
        """

        return prompt_string

In [None]:
def generate_extractions():
  responses = []
  for i in range(len(df_narratives)):

    extraction_prompt = generate_prompt(df_narratives.iloc[i,0], dataset_description, input_description, target_description, dfs_initial_shap.get(str(i)))
    #print(extraction_prompt)

    response = generate_response(api_key, extraction_prompt)
    #print(response)

    responses.append(response)
    #print(i)
    #break
  return responses

In [None]:
file_path = f"/content/drive/My Drive/Colab Notebooks/{audience_type}_responses_final.xlsx"

if os.path.exists(file_path):
    responses = pd.read_excel(file_path).iloc[:,0].tolist()
    #print (responses)

else:
    responses = generate_extractions()

    ##save responses
    response_df = pd.DataFrame(responses)
    response_df.to_excel(f"/content/drive/My Drive/Colab Notebooks/{audience_type}_responses_final.xlsx", index=False)

In [None]:
len(responses)

30

In [None]:
responses[0]

'{"TotalDebtToIncomeRatio": {"rank": 0, "sign": -1, "value": None}, "AnnualIncome": {"rank": 1, "sign": -1, "value": None}, "PreviousLoanDefaults": {"rank": 2, "sign": -1, "value": None}}'

In [None]:
def extract_dict_from_str(extracted_str: str)->dict:

      """
      Extracts a dictionary from a string

      Parameters:
      -----------
      extracted_str : str
          The answer to the extractor prompt, usually a simple dictionary in string form but could be preceded by some sentences.

      Returns:
      --------
      extracted_dict: dict
          A prompt for the extractor model
      """

      start_index = extracted_str.find("{")
      end_index = extracted_str.rfind("}")
      dict_str = extracted_str[start_index : end_index + 1]

      extracted_dict = ast.literal_eval(dict_str)

      return extracted_dict

In [None]:
def get_diff(extracted_dict: dict, explanation: pd.DataFrame):

    """
    Compares the extracted dict with the actual explanation and calculates their difference

    Parameters:
    -----------
    extracted_dict : str
        The dictionary extracted from the LLM answer.
    explanation: pd.DataFrame
        A dataframe containing a column with the SHAP values and feature values.

    Returns: Tuple[5x list]
    --------
    """
    #### This calculation is a bit subtle because you can in principle have various types of hallucinations in the extracted dict.

    ###STEP1: WE COMPUTE DIFFERENCE FOR ALL EXTRACTED FEATURES THAT ACTUALLY EXIST:

    #1)make sure the explanation is sorted by SHAP values (this should be already the case if generated with SHAPstory):


    explanation["abs_SHAP"] = explanation["SHAP Value"].abs()
    explanation = explanation.sort_values(by="abs_SHAP", ascending=False)
    #explanation.drop(columns=["abs_SHAP"])

    explanation.reset_index(inplace=True, drop=True)


    #reset index explanation??


    #2) create a dataframe out of the extracted dict
    df_extracted=pd.DataFrame(extracted_dict).T
    df_extracted.reset_index(inplace=True)
    df_extracted.rename(columns={"index":"feature_name"},inplace=True)
    #print(df_extracted)


    #3) filter the real explanation on the features that were present in the extraction dict
    cat_dtype = pd.CategoricalDtype(df_extracted["feature_name"], ordered=True)
    explanation['feature_name']=explanation['feature_name'].astype(cat_dtype)
    df_real = explanation[explanation.feature_name.isin(df_extracted["feature_name"])].sort_values(by="feature_name")

    #print(df_real)

    #4) get a list of feature names that have been extracted but do not exist (usually doesn't happen but good check)
    incorrect_features = df_extracted[~df_extracted['feature_name'].isin(df_real['feature_name'])]['feature_name']
    #print(incorrect_features)
    #print('-'*5)

    #5) now that we have a separate list of the hallucinated features, continue only with the overlap of existing features
    df_extracted=df_extracted[df_extracted['feature_name'].isin(df_real['feature_name'])]
    sign_series=df_real["SHAP Value"].map(lambda x: int(np.sign(x)))
    df_real.insert(1,"sign",sign_series)
    df_real.insert(1,"rank", df_real.index)
    #df_real=df_real.drop(columns=["SHAP Value","feature_desc"])

    #print(df_real)



    #6) for all the real features replace any non-numeric extracted element with np.nan
    rank_array=np.array([np.nan if type(x) not in [np.float64, np.int64,np.float32, np.int32, int] else x for x in df_extracted["rank"].to_numpy()])
    sign_array=np.array([np.nan if type(x) not in [np.float64, np.int64,np.float32, np.int32, int] else x for x in df_extracted["sign"].to_numpy()])
    value_array=np.array([np.nan if type(x) not in [np.float64, np.int64,np.float32, np.int32, int] else x for x in df_extracted["value"].to_numpy()])
    #print(value_array)
    #print("rank")
    #print(rank_array)

    #7) compute the difference arrays that we intend to output
    rank_diff=(rank_array-df_real["rank"].to_numpy()).astype(float)
    sign_diff=(sign_array*df_real["sign"].to_numpy()<=0).astype(float)
    value_diff=(value_array-df_real["feature_value"].to_numpy()).astype(float)
    #print(rank_diff)


    #also useful to get actual real rank and extracted rank lists
    real_rank=df_real["rank"].to_numpy().astype(int)
    extracted_rank=df_extracted["rank"].to_numpy().astype(int)




    ###STEP 2: Now account for the fact that we ignored hallucinated features previously, and add a np.inf for the difference there.
    for idx in sorted(incorrect_features.index.sort_values()):

        print("""*** Warning: Some features extracted by model were not in the real feature list ***.
        If this warning is encountered too often this could be a sign that something is wrong.""")


        if idx >= len(rank_diff):
            # Insert at the last position
            rank_diff = np.append(rank_diff, np.inf)
        else:
            # Insert at the specified index
            rank_diff = np.insert(rank_diff, idx, np.inf)

        if idx>=len(sign_diff):
            sign_diff = np.append(sign_diff, np.inf)
        else:
            sign_diff = np.insert(sign_diff, idx, np.inf)

        if idx>=len(value_diff):
            value_diff = np.append(value_diff, np.inf)
        else:
            value_diff = np.insert(value_diff, idx, np.inf)

    ### So now at the end, the rank/sign/value-diff arrays contain the difference between the extracted feature and the real feature,
    ### and if the feature did not exist have an np.inf at that position, or if the extracted element was not numeric contain np.nan.

    return rank_diff.tolist() , sign_diff.tolist(), value_diff.tolist(), real_rank.tolist(), extracted_rank.tolist()

In [None]:
rank_diffs = []
sign_diffs = []
value_diffs = []
for i in range(len(responses)):
  extracted = extract_dict_from_str(responses[i])
  rank_diff, sign_diff, value_diff, real_rank, extracted_rank = get_diff(extracted, dfs_initial_shap[str(i)])

  rank_diffs.append(rank_diff)
  sign_diffs.append(sign_diff)
  value_diffs.append(value_diff)






In [None]:
df_rank_diffs = pd.DataFrame(rank_diffs)
df_sign_diffs = pd.DataFrame(sign_diffs)
df_value_diffs = pd.DataFrame(value_diffs)

In [None]:
'''
i=8
single_df = dfs_initial_shap[str(i)]
single_df["abs_SHAP"] = single_df["SHAP Value"].abs()
single_df = single_df.sort_values(by="abs_SHAP", ascending=False)
single_df.head(10)
'''


Unnamed: 0,feature_name,Description,feature_value,SHAP Value,abs_SHAP
20,NetWorth,NetWorth: Total financial worth of the applicant,3238.0,-1.860216,1.860216
23,TotalDebtToIncomeRatio,TotalDebtToIncomeRatio: Total debt against income,0.242345,-1.626891,1.626891
21,InterestRate,InterestRate: Applied interest rate,0.202333,1.571941,1.571941
2,CreditScore,CreditScore: Creditworthiness score of the app...,627.0,-1.36034,1.36034
1,AnnualIncome,AnnualIncome: Yearly income of the applicant,85311.0,1.318429,1.318429
14,LengthOfCreditHistory,LengthOfCreditHistory: Credit history duration...,19.0,0.866318,0.866318
6,MonthlyDebtPayments,MonthlyDebtPayments: Monthly debt obligations ...,1082.0,0.852959,0.852959
13,PaymentHistory,PaymentHistory: Past payment behavior of the a...,16.0,-0.489208,0.489208
0,Age,Age: Age of the applicant,47.0,0.442091,0.442091
26,EducationLevel_Bachelor,EducationLevel_Bachelor: Indicates if the appl...,1.0,0.335618,0.335618


In [None]:
df_rank_diffs

Unnamed: 0,0,1,2
0,0.0,0.0,0.0
1,0.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
5,0.0,0.0,0.0
6,0.0,0.0,0.0
7,0.0,0.0,0.0
8,0.0,0.0,0.0
9,0.0,0.0,0.0


In [None]:
df_sign_diffs

Unnamed: 0,0,1,2
0,0.0,0.0,0.0
1,1.0,1.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
5,0.0,0.0,0.0
6,0.0,0.0,0.0
7,0.0,0.0,0.0
8,1.0,0.0,1.0
9,0.0,0.0,0.0


In [None]:
df_value_diffs


Unnamed: 0,0,1,2
0,,,
1,,,
2,,,
3,,,
4,,,
5,,,
6,,,
7,,,
8,,,
9,,,


In [None]:
def average_zero(df):

    "compute the average occurrence of zeros in a dataframe among all numeric values"

    #take all values of the dataframe together
    values = df.values.flatten()

    #keep only array of numeric values (so completely ignore nans or np.infs)
    numeric_values = values[np.isfinite(values)]

    #count total zeroes and total numerics values in the df
    num_zeros = np.sum(numeric_values == 0)
    total_numeric_values = len(numeric_values)

    #compute the accuracy for those objects
    average_occurrence_of_zero = num_zeros / total_numeric_values

    return average_occurrence_of_zero

In [None]:
rank_accuracy = average_zero(df_rank_diffs)
sign_accuracy = average_zero(df_sign_diffs)
value_accuracy = average_zero(df_value_diffs)

print(rank_accuracy)
print(sign_accuracy)
print(value_accuracy)

1.0
0.9222222222222223
1.0


In [None]:
!pip install textstat

Collecting textstat
  Downloading textstat-0.7.8-py3-none-any.whl.metadata (15 kB)
Collecting pyphen (from textstat)
  Downloading pyphen-0.17.2-py3-none-any.whl.metadata (3.2 kB)
Collecting cmudict (from textstat)
  Downloading cmudict-1.1.1-py3-none-any.whl.metadata (3.6 kB)
Downloading textstat-0.7.8-py3-none-any.whl (239 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.1/239.1 kB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cmudict-1.1.1-py3-none-any.whl (939 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m939.7/939.7 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyphen-0.17.2-py3-none-any.whl (2.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m20.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyphen, cmudict, textstat
Successfully installed cmudict-1.1.1 pyphen-0.17.2 textstat-0.7.8


In [None]:
from textstat import flesch_kincaid_grade
from textstat import flesch_reading_ease
from textstat import gunning_fog

In [None]:
#Example for just one story
generated_story = df_narratives.iloc[0,0]
fkgl_score = flesch_kincaid_grade(generated_story)
print(fkgl_score)

11.876588465298145


In [None]:
#Calculate the scores
fkgl_scores = df_narratives.iloc[:, 0].apply(flesch_kincaid_grade)
fre_scores = df_narratives.iloc[:, 0].apply(flesch_reading_ease)
fog_scores = df_narratives.iloc[:, 0].apply(gunning_fog)

#Construct the dataframes to save the scores
df_fkgl = pd.DataFrame({
'narrative': df_narratives.iloc[:, 0],
'fkgl_score': fkgl_scores
})

df_fre = pd.DataFrame({
'narrative': df_narratives.iloc[:, 0],
'fre_score': fre_scores
})

df_fog = pd.DataFrame({
'narrative': df_narratives.iloc[:, 0],
'fog_score': fog_scores
})


#Compute average for each dataframe
average_fkgl = df_fkgl["fkgl_score"].mean()
print(f"Average FKGL score for {audience_type}: {average_fkgl:.2f}")

average_fre = df_fre["fre_score"].mean()
print(f"Average FRE score for {audience_type}: {average_fre:.2f}")

average_fog = df_fog["fog_score"].mean()
print(f"Average FOG score for {audience_type}: {average_fog:.2f}")

Average FKGL score for general: 12.18
Average FRE score for general: 41.15
Average FOG score for general: 15.99
