Retail Risk Monitoring

In [None]:
import os
import pandas as pd
import sqlite3
from langchain_groq import ChatGroq
from langchain_community.utilities import SQLDatabase
from langchain_community.agent_toolkits import create_sql_agent
import time
from getpass import getpass

groq_key = getpass("Enter your Groq API key: ")
os.environ["GROQ_API_KEY"] = groq_key  # Replace with your key in the prompt
db_name = "macro_risk.db"

In [2]:
# Helper function for cleaning the data
def clean_gov_data(df):
    """
    Cleans UK Gov data based on the Metadata file rules:
    - [z] = Not applicable (Convert to 0)
    - [x] = Not available (Convert to 0)
    - Removes commas from numbers (e.g. 1,000 -> 1000)
    """
    # Replace metadata codes with 0
    df = df.replace({'[z]': 0, '[x]': 0, ':': 0, '..': 0})
    
    # Ensure all numerical columns are actually numbers
    for col in df.columns:
        if col not in ['period', 'year', 'month']:
            if df[col].dtype == object:
                # Remove commas
                df[col] = df[col].astype(str).str.replace(',', '')
            # Force to numeric 
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)
    return df

# LOAD INDIVIDUAL DATA
print("--- LOADING INDIVIDUAL DATA ---")
try:
    # Load CSV
    df_ind = pd.read_csv('Individual_Insolvency_November_2025.csv', header = 0)
    
    # Map based on Metadata File
    rename_ind = {
        'year': 'year', 
        'month': 'month',
        'EW_total_individuals_SA': 'total_ind_insolvencies',
        'EW_bankruptcy_SA': 'bankruptcies',
        'EW_IVA_SA': 'ivas',
        'EW_DRO_SA': 'dros',
        'EW_self_employed_bankrupt_SA': 'trader_bankruptcies',  # Key SME Metric
        'EW_creditor_bankrupt_SA': 'forced_bankruptcies',       # Hostile
        'EW_debtor_bankrupt_SA': 'voluntary_bankruptcies'       # Strategic
    }
    
    df_ind = df_ind.rename(columns=rename_ind)
    # Only keep columns that actually exist in the file
    available_cols = [c for c in rename_ind.values() if c in df_ind.columns]
    df_ind = df_ind[available_cols]
    
    df_ind = clean_gov_data(df_ind)
    print(f"Loaded Individual Data: {len(df_ind)} rows.")

except Exception as e:
    print(f"Error loading Individual CSV: {e}")
    df_ind = pd.DataFrame()

# LOAD COMPANY DATA
print("--- LOADING COMPANY DATA ---")
try:
    df_co = pd.read_csv('Company_Insolvency_November_2025.csv', header=0) 
    
    # Normalise headers (lowercase, no spaces) to match metadata keys
    df_co.columns = df_co.columns.str.strip().str.lower().str.replace(" ", "_")

    # Map based on Metadata File
    rename_co = {
        'year': 'year',
        'month': 'month',
        'total_ew_sa': 'total_co_insolvencies',
        'compliq_ew_sa': 'compulsory_liquidations', # Court Ordered
        'cvl_ew_sa': 'voluntary_liquidations',      # Director Led
        'admin_ew_sa': 'administrations'            # Rescue
    }
    
    df_co = df_co.rename(columns=rename_co)
    available_cols = [c for c in rename_co.values() if c in df_co.columns]
    df_co = df_co[available_cols]
    
    df_co = clean_gov_data(df_co)
    print(f"Loaded Company Data: {len(df_co)} rows.")

except Exception as e:
    print(f"Error loading Company CSV: {e}")
    df_co = pd.DataFrame()

--- LOADING INDIVIDUAL DATA ---
Loaded Individual Data: 311 rows.
--- LOADING COMPANY DATA ---
Loaded Company Data: 311 rows.


In [4]:
print("--- DATABASE BUILT ---")
conn = sqlite3.connect(db_name)
df_ind.to_sql("retail_risk", conn, if_exists='replace', index=False)
df_co.to_sql("corporate_risk", conn, if_exists='replace', index=False)
conn.close()


--- DATABASE BUILT ---


In [None]:
# INITIALISE AGENT (LIGHTWEIGHT & STABLE)
db = SQLDatabase.from_uri(f"sqlite:///{db_name}")
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0)

# Manually define the schema to save Tokens. 
# Remove 80+ useless columns so the Agent focuses only on the good stuff.
custom_table_info = """
Table: retail_risk
Columns: 
- year (INTEGER)
- month (INTEGER)
- total_ind_insolvencies (REAL)
- trader_bankruptcies (REAL) - Self-Employed bankruptcies
- forced_bankruptcies (REAL) - Creditor-initiated bankruptcies
- voluntary_bankruptcies (REAL)

Table: corporate_risk
Columns:
- year (INTEGER)
- month (INTEGER)
- total_co_insolvencies (REAL)
- compulsory_liquidations (REAL) - Court-ordered liquidations
- voluntary_liquidations (REAL)
"""

# STEP 2: Optimized Prompt with Examples
system_prefix = f"""
You are a Risk Analyst. Answer ONLY using data from SQL queries. Do NOT invent numbers.

SCHEMA:
{custom_table_info}

RULES:
1. ALWAYS SUM monthly data: SELECT year, SUM(column) FROM table WHERE year >= X GROUP BY year
2. JOIN on year AND month: ON t1.year = t2.year AND t1.month = t2.month
3. Query first, then analyze the results in plain English

EXAMPLES:
Q: "Yearly Trader Bankruptcies since 2020. What's the trend?"
SQL: SELECT year, SUM(trader_bankruptcies) FROM retail_risk WHERE year >= 2020 GROUP BY year ORDER BY year
Then describe: increasing/decreasing/stable based on the actual numbers

Q: "Total Individual vs Corporate for 2023. How many times higher?"
SQL: SELECT SUM(total_ind_insolvencies) FROM retail_risk WHERE year = 2023
SQL: SELECT SUM(total_co_insolvencies) FROM corporate_risk WHERE year = 2023
Then calculate: divide first by second in your answer

OUTPUT: Brief summary with actual numbers. No SQL code in final answer.
"""

agent_executor = create_sql_agent(
    llm=llm,
    db=db,
    agent_type="zero-shot-react-description",
    prefix=system_prefix,
    verbose=True,
    handle_parsing_errors=True,
    max_iterations=5  # Limit iterations to save tokens
)

In [None]:
# RUN ANALYSIS
print("\n--- MACRO RISK ANALYST ---")

questions = [
    "Yearly Trader Bankruptcies since 2020. What's the trend?",
    "Compare yearly Company Compulsory Liquidations vs Individual Forced Bankruptcies since 2010. Same peak year?",
    "Total Individual vs Corporate Insolvencies for 2023. How many times higher?"
]

for q in questions:
    print(f"\nAnalyst: {q}")
    try:
        response = agent_executor.invoke(q)
        print(f"Agent: {response['output']}")
    except Exception as e:
        print(f"Error: {e}")
    
    time.sleep(2)


--- MACRO RISK ANALYST ---

Analyst: Yearly Trader Bankruptcies since 2020. What's the trend?


[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3mcorporate_risk, retail_risk[0m[32;1m[1;3mThought: I should query the schema of the retail_risk table to see what columns I can use.
Action: sql_db_schema
Action Input: retail_risk[0m[33;1m[1;3m
CREATE TABLE retail_risk (
	year INTEGER, 
	month INTEGER, 
	total_ind_insolvencies INTEGER, 
	bankruptcies INTEGER, 
	ivas INTEGER, 
	dros INTEGER, 
	trader_bankruptcies INTEGER, 
	forced_bankruptcies INTEGER, 
	voluntary_bankruptcies INTEGER
)

/*
3 rows from retail_risk table:
year	month	total_ind_insolvencies	bankruptcies	ivas	dros	trader_bankruptcies	forced_bankruptcies	voluntary_bankruptcies
2000	1	2652	1858	794	0	0	692	1166
2000	2	2433	1735	698	0	0	593	1142
2000	3	2495	1728	767	0	0	627	1101
*/[0m[32;1m[1;3mThought: I should query the retail_risk table to get th