In [None]:
from dotenv import load_dotenv

# Setting up LLM

In [2]:
def llm(query, token, model):
    """
    Query an LLM using the Hugging Face Inference API.

    Parameters:
        query (str): The input query.
        token (str): Hugging Face API token.
        model (str): Model.

    Returns:
        str: Generated response from the LLM.
    """
    parameters = {
        "max_new_tokens": 300,
        "temperature": 0.1,
        "top_k": 50,
        "top_p": 0.95,
        "return_full_text": False
    }
    
    headers = {
        'Authorization': f'Bearer {token}',
        'Content-Type': 'application/json'
    }
    
    payload = {
        "inputs": query,
        "parameters": parameters
    }
    
    response = requests.post(f"https://api-inference.huggingface.co/models/meta-llama/{model}", headers=headers, json=payload)
    
    if response.status_code != 200:
        raise Exception(f"API request failed with status {response.status_code}: {response.text}")
    
    return response.json()[0].get('generated_text', '').strip()

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
load_dotenv()

In [4]:
token = os.getenv("HUGGING_FACE_TOKEN")
model = "Meta-Llama-3-8B-Instruct"

# Prompting

In [None]:
def format_shap_prompt(customer_data, prediction, prediction_proba):
    prompt = ""
    risk_label = "high risk" if prediction == 1 else "low risk"
    features_and_shap_values = ""
    
    # Iterate through each feature in the SHAP data
    for _, row in customer_data.iterrows():
        feature = row["Feature"]
        feature_value = row["Feature Value"]
        shap_value = row["SHAP Value"]
    
        # Here we add the SHAP and feature information
        if isinstance(feature_value, (int, float)):
            features_and_shap_values += f"- {feature}: {feature_value:.2f} (SHAP impact: {shap_value:.2f})\n"
        else:
            features_and_shap_values += f"- {feature}: {feature_value} (SHAP impact: {shap_value:.2f})\n"
    
    prompt = f"""
You are a smart and helpful explainer and interpreter for a machine learning model that classifies customers as high or low risk regarding credit default.

The customer in question has been classified as {risk_label} with a probability of {prediction_proba:.2f}.
Below are the customer's features and their corresponding SHAP values:

{features_and_shap_values}

Instructions:
1. Provide only the main reasons the customer was classified as {risk_label} by referencing the most impactful features and the respective reasons behind it.
2. Do not include any disclaimers, contact information, or explanations of what SHAP values are. You should analyze the SHAP values in relation to the feature values and the connections between them, but the values themselves don't have to be mentioned in the reply. 
3. Write your answer without extra salutations, sign-offs and mentions of the SHAP values.
4. Write your answer using bullet points for the features you want to mention.

Take your time to thoroughly analyze the values and the connections. Note that the false flags for the status are because of the one hot encoding, meaning that the true flag was the one representative of the analyzed customer.
Please explain the primary factors that led to this classification:
"""

    return prompt

In [None]:
# Example usage
customer_index = 6799
customer_data = load_shap_values(customer_index)
formatted_prompt = format_shap_prompt(
    customer_data, 
    prediction=1,  # customer classified as high risk
    prediction_proba=0.85)

# print(formatted_prompt)

try:
    response = llm(formatted_prompt, token, model)
    print("Generated Response:\n", response)
except Exception as e:
    print(e)