In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import time

# Load fine-tuned model and tokenizer
base_model_path = "./Mistral-7B-v0.1"             # aBase model
finetuned_model_path = "./mistral-risk-finetuned-final"    # Fine-tuned weights
tokenizer = AutoTokenizer.from_pretrained(base_model_path)
tokenizer.pad_token = tokenizer.eos_token

# Load the model with LoRA weights
model = AutoModelForCausalLM.from_pretrained(
    base_model_path,
    torch_dtype=torch.float16,
    device_map="auto",
    offload_folder="./offload"
)
model = PeftModel.from_pretrained(model, finetuned_model_path)
model.eval()

# Define your inference function
def generate_analysis(input_data: str, max_new_tokens=512, temperature=0.7, top_p=0.95):
    prompt = f"You are an expert financial risk analyst. Analyze the provided text for financial risks, and output a structured assessment in JSON format including risk detection, specific risk flags,financial exposure details, and analysis notes. {input_data}"

    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

    with torch.no_grad():
        start = time.time()
        outputs = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            do_sample=True,
            temperature=temperature,
            top_p=top_p,
            pad_token_id=tokenizer.eos_token_id
        )
        end = time.time()

    result = tokenizer.decode(outputs[0], skip_special_tokens=True)
    print(f"\n⏱️ Generation time: {end - start:.2f} seconds")
    return result

# Example use
input_text = """
    The Company has entered into a five-year contract to purchase raw materials
    from a single supplier in a volatile market. The contract requires minimum
    purchases of $10M annually with no cancellation clause. Recent market analysis
    suggests potential price fluctuations of up to 40% in the next year.
    """
output = generate_analysis(input_text)
print("\n Generated Analysis:\n", output)

    

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Some parameters are on the meta device because they were offloaded to the cpu.



⏱️ Generation time: 165.53 seconds

 Generated Analysis:
 You are an expert financial risk analyst. Analyze the provided text for financial risks, and output a structured assessment in JSON format including risk detection, specific risk flags,financial exposure details, and analysis notes. 
    The Company has entered into a five-year contract to purchase raw materials
    from a single supplier in a volatile market. The contract requires minimum
    purchases of $10M annually with no cancellation clause. Recent market analysis
    suggests potential price fluctuations of up to 40% in the next year.
    
    The Company has identified the potential financial risks associated with the
    contract and has implemented a risk management strategy to mitigate the impact
    of price fluctuations. The strategy includes:

    - Negotiating a price cap of 10% above the current market rate.
    - Implementing a price floor of 5% below the current market rate.
    - Maintaining a minimum invent

In [None]:
import time
import torch
import matplotlib.pyplot as plt
import evaluate
import Levenshtein
from transformers import AutoModelForCausalLM, AutoTokenizer
from Levenshtein import distance as levenshtein_distance
from bert_score import score as bert_score
from docx import Document
import numpy as np



# Load evaluation metrics using the evaluate package
rouge = evaluate.load("rouge")
bleu = evaluate.load("bleu")

# 100 diverse test prompts for a compliance-focused LLM
prompts = [
    "What are the key environmental risks highlighted in this report?",
    "Summarize key cybersecurity vulnerabilities in the filing.",
    "Assess financial statement accuracy based on disclosed risks.",
    "Identify any potential conflicts of interest in governance structures.",
    "What are the tax risks identified in the latest filing?",
    "Highlight any liquidity risks mentioned in this filing.",
    "Evaluate compliance with international anti-bribery laws.",
    "Analyze the impact of market risk factors disclosed in this report.",
    "What are the key risks identified in this company’s supply chain?",
    "Summarize risk factors from the latest 10-Q filing.",
    "Identify any references to potential violations of the Foreign Corrupt Practices Act (FCPA).",
    "What governance issues are raised in this proxy statement?",
    "Detect any conflicts between financial projections and risk disclosures.",
    "Identify operational inefficiencies highlighted in the filing.",
    "Summarize management's response to identified risks.",
    "What mitigation strategies are proposed for identified risks?",
    "Review the risk management framework outlined in the document.",
    "Analyze potential strategic risks identified in the report.",
    "Highlight reputational risks mentioned in the document.",
    "Identify regulatory penalties or fines discussed in the report.",
    "Detect any signs of financial misreporting in the filing.",
    "Analyze the company’s risk appetite based on the disclosed risks.",
    "Identify political risks related to operations in foreign countries.",
    "Assess the adequacy of the company’s disaster recovery plans.",
    "What are the sustainability risks identified in this report?",
    "Summarize legal proceedings related to risk in this filing.",
    "Identify insurance coverage gaps discussed in the filing.",
    "Detect any potential issues with intellectual property management.",
    "Highlight key fraud risks disclosed in the document.",
    "Summarize the company’s risk tolerance as outlined in the filing.",
    "Assess the company’s risk diversification strategy.",
    "Review how the company plans to handle potential supply chain disruptions.",
    "Identify reputational risks related to the company’s brand.",
    "Analyze risk exposure from foreign exchange fluctuations.",
    "Summarize any risks related to customer concentration.",
    "Detect conflicts of interest in the company’s executive compensation plan.",
    "Identify risks related to mergers and acquisitions in the filing.",
    "Analyze whether the company has adequate legal compliance programs.",
    "Review how the company addresses regulatory changes in this document.",
    "Summarize the company’s risk management priorities for the next year.",
    "Assess risks related to changes in government policy.",
    "Identify any material weaknesses in internal controls.",
    "Evaluate the company’s risk management performance over time.",
    "Highlight risks associated with operational outsourcing.",
    "Assess the financial impact of risk events disclosed in the filing.",
    "Summarize risks related to the company’s digital transformation efforts.",
    "Identify key social risks disclosed in the filing.",
    "What are the potential risks associated with the company’s new product launch?",
    "Evaluate the company’s approach to mitigating operational risks.",
    "Summarize risks related to the company’s leadership transitions.",
    "Highlight financial risks related to the company’s capital structure.",
    "What are the potential risks associated with the company’s debt?",
    "Identify any environmental liabilities discussed in the filing.",
    "Assess the company’s readiness for changes in tax law.",
    "What are the key factors contributing to the company’s credit risk?",
    "Analyze the company’s approach to managing legal risks.",
    "Highlight risks related to compliance with labor laws.",
    "What are the financial implications of disclosed risks?",
    "Assess the company’s approach to managing reputation risk.",
    "Summarize the company’s governance structure and related risks.",
    "Identify risks related to competition in the company’s industry.",
    "Analyze risks associated with the company’s expansion strategy.",
    "Evaluate how the company mitigates risks from geopolitical tensions.",
    "Summarize the company’s approach to managing climate-related risks.",
    "Detect any emerging risks in the company’s business environment.",
    "What are the risks associated with the company’s reliance on technology?",
    "Highlight any risks identified in the company’s corporate social responsibility (CSR) reports.",
    "Identify risks associated with intellectual property infringement.",
    "Summarize risks related to compliance with the GDPR.",
    "Evaluate the company’s exposure to risks from commodity price volatility.",
    "What operational risks are associated with the company’s logistics network?",
    "Identify key market risks affecting the company’s performance.",
    "Analyze risks arising from changes in consumer behavior.",
    "Summarize risks related to the company’s reliance on key suppliers.",
    "Identify risks related to changes in the regulatory landscape for healthcare.",
    "Evaluate risks associated with the company’s use of third-party vendors.",
    "What are the risks related to the company’s employee compensation plans?",
    "Summarize risks identified in the company’s sustainability reports.",
    "Identify risks related to the company’s reliance on renewable energy.",
    "Assess the company’s approach to managing risks from natural disasters.",
    "What are the risks related to the company’s strategic investments?",
    "Summarize the risks associated with the company’s real estate holdings.",
    "What are the company’s plans to address emerging regulatory risks?",
    "Detect risks associated with the company’s reliance on digital marketing.",
    "What are the risks associated with the company’s international operations?",
    "Summarize the company’s approach to managing workforce-related risks.",
    "Highlight risks related to the company’s pension liabilities.",
    "Identify risks related to potential supply shortages.",
    "What are the risks associated with the company’s cybersecurity measures?",
    "Assess the company’s compliance with industry-specific regulations.",
    "Summarize risks related to the company’s research and development activities.",
    "What are the potential risks related to the company’s legal disputes?",
    "Identify risks related to changes in consumer privacy laws.",
    "What are the emerging risks in the company’s market?",
    "Summarize risks related to the company’s debt refinancing efforts.",
    "Identify risks associated with fluctuations in raw material prices.",
    "Assess risks related to the company’s foreign investment strategies.",
    "Summarize the company’s risk management approach to emerging markets.",
    "Evaluate the company’s risk management framework against best practices."
]

# Number of test runs
num_tests = 3
win_threshold = 0.5  # BERTScore threshold for Win% calculation


# Ensure model is in evaluation mode
model.eval()

# Initialize summary containers
results = []

for prompt in prompts:
    # Initialize lists for performance metrics per prompt
    time_taken_list = []
    tokens_per_second_list = []
    perplexity_list = []
    rouge_scores = []
    bleu_scores = []
    edit_distance_list = []
    bert_score_list = []
    win_rate_list = []

    for _ in range(num_tests):
        start_time = time.time()

        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

        with torch.no_grad():
            output = model.generate(**inputs, max_new_tokens=64, temperature=0.7)

        decoded_output = tokenizer.decode(output[0], skip_special_tokens=True)

        time_taken = time.time() - start_time
        time_taken_list.append(time_taken)

        num_tokens = len(inputs["input_ids"][0]) + 64
        tokens_per_second = num_tokens / time_taken
        tokens_per_second_list.append(tokens_per_second)

        with torch.no_grad():
            outputs = model(**inputs, labels=inputs["input_ids"])
            loss = outputs.loss
            perplexity = torch.exp(loss).item()
            perplexity_list.append(perplexity)

        rouge_result = rouge.compute(predictions=[decoded_output], references=[prompt])
        rouge_scores.append(rouge_result["rougeL"])

        bleu_result = bleu.compute(predictions=[decoded_output], references=[prompt])
        bleu_scores.append(bleu_result["bleu"])

        edit_distance = levenshtein_distance(decoded_output, prompt)
        edit_distance_list.append(edit_distance)

        P, R, F1 = bert_score([decoded_output], [prompt], lang="en", rescale_with_baseline=True)
        f1_score = F1.mean().item()
        bert_score_list.append(f1_score)
        win_rate_list.append(1 if f1_score >= win_threshold else 0)

    # Collect average results for this prompt
    results.append({
        "prompt": prompt,
        "avg_time": sum(time_taken_list) / num_tests,
        "avg_tps": sum(tokens_per_second_list) / num_tests,
        "avg_perplexity": sum(perplexity_list) / num_tests,
        "avg_rouge": sum(rouge_scores) / num_tests,
        "avg_bleu": sum(bleu_scores) / num_tests,
        "avg_edit_distance": sum(edit_distance_list) / num_tests,
        "avg_bert_score": sum(bert_score_list) / num_tests,
        "win_rate": sum(win_rate_list) / num_tests * 100
    })

# Create a Word document
doc = Document()
doc.add_heading('LLM Compliance Evaluation Report', 0)

# Add model and experiment metadata
doc.add_paragraph(f"Model: {model.__class__.__name__}")
doc.add_paragraph(f"Number of test runs per prompt: {num_tests}")
doc.add_paragraph(f"BERTScore Win Threshold: {win_threshold}")

# Add results table
table = doc.add_table(rows=1, cols=9)
table.style = 'Table Grid'
hdr_cells = table.rows[0].cells
hdr_cells[0].text = 'Prompt'
hdr_cells[1].text = 'Time (s)'
hdr_cells[2].text = 'Tokens/sec'
hdr_cells[3].text = 'Perplexity'
hdr_cells[4].text = 'ROUGE-L'
hdr_cells[5].text = 'BLEU'
hdr_cells[6].text = 'Edit Dist'
hdr_cells[7].text = 'BERTScore'
hdr_cells[8].text = 'Win Rate (%)'

# Add each result to the table
for r in results:
    row_cells = table.add_row().cells
    row_cells[0].text = r['prompt']
    row_cells[1].text = f"{r['avg_time']:.2f}"
    row_cells[2].text = f"{r['avg_tps']:.2f}"
    row_cells[3].text = f"{r['avg_perplexity']:.2f}"
    row_cells[4].text = f"{r['avg_rouge']:.4f}"
    row_cells[5].text = f"{r['avg_bleu']:.4f}"
    row_cells[6].text = f"{r['avg_edit_distance']:.2f}"
    row_cells[7].text = f"{r['avg_bert_score']:.4f}"
    row_cells[8].text = f"{r['win_rate']:.2f}"

# Save the document
doc.save("compliance_mistral_finetuned_evaluation.docx")

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.
Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it