In [6]:
pip install groq

Collecting groq
  Downloading groq-1.0.0-py3-none-any.whl.metadata (16 kB)
Collecting distro<2,>=1.7.0 (from groq)
  Downloading distro-1.9.0-py3-none-any.whl.metadata (6.8 kB)
Collecting pydantic<3,>=1.9.0 (from groq)
  Downloading pydantic-2.12.5-py3-none-any.whl.metadata (90 kB)
Collecting sniffio (from groq)
  Using cached sniffio-1.3.1-py3-none-any.whl.metadata (3.9 kB)
Collecting annotated-types>=0.6.0 (from pydantic<3,>=1.9.0->groq)
  Using cached annotated_types-0.7.0-py3-none-any.whl.metadata (15 kB)
Collecting pydantic-core==2.41.5 (from pydantic<3,>=1.9.0->groq)
  Downloading pydantic_core-2.41.5-cp312-cp312-win_amd64.whl.metadata (7.4 kB)
Collecting typing-inspection>=0.4.2 (from pydantic<3,>=1.9.0->groq)
  Using cached typing_inspection-0.4.2-py3-none-any.whl.metadata (2.6 kB)
Downloading groq-1.0.0-py3-none-any.whl (138 kB)
Downloading distro-1.9.0-py3-none-any.whl (20 kB)
Downloading pydantic-2.12.5-py3-none-any.whl (463 kB)
Downloading pydantic_core-2.41.5-cp312-cp312-w

In [1]:
import sys
print(sys.executable)

E:\analytics\datasets_for_DA_proj_Self\esg-assure-compliance-engine\env\python.exe


In [2]:
import pandas as pd
import json

# 1. Load your exceptions
exceptions = pd.read_csv('../data/audit_exceptions.csv')
raw_data = pd.read_csv('../data/suppliers_raw.csv')

# Merge to get full details (Industry, Region) for the bad actors
audit_merge = pd.merge(exceptions, raw_data, on='Supplier_ID', how='left')

# 2. Define the "Regulatory Brain" (The Context)
# In a real project, this would come from a Vector DB (RAG). 
# For now, we hardcode the key regulations to ensure accuracy.
regulatory_context = {
    "Carbon Outlier": {
        "Regulation": "BRSR Principle 6 (Environmental Impact)",
        "Requirement": "Companies must minimize environmental footprint. Scope 1 & 2 emissions must be within industry deviation limits.",
        "Action": "Immediate Environmental Impact Assessment (EIA) required."
    },
    "Reporting Gap": {
        "Regulation": "SEBI BRSR Core - Mandatory Disclosure",
        "Requirement": "Listed entities must disclose social diversity metrics (Gender, Differently-abled). Null values are non-compliant.",
        "Action": "Issue 'Notice of Non-Disclosure' and request data within 7 days."
    },
    "Logical Anomaly": {
        "Regulation": "International Audit Standard (ISA) 240",
        "Requirement": "Data integrity checks must flag physically impossible values (e.g., Zero Water Usage in Mfg).",
        "Action": "Forensic Audit Flag: Potential Greenwashing or Sensor Failure."
    }
}

# 3. The "Prompt Factory" Function
# 3. The Corrected "Prompt Factory" Function
def generate_audit_prompt(row):
    issue_type = row['Issue']
    context = regulatory_context.get(issue_type, {})
    
    # Construct the Dynamic Prompt
    prompt = f"""
    ACT AS: Senior ESG Risk Auditor at EY.
    
    TASK: Write a formal 'Audit Observation Memo' for the following supplier.
    
    --- DATA CONTEXT ---
    Supplier ID: {row['Supplier_ID']}
    Industry: {row['Industry']} 
    Region: {row['Region']}
    Detected Issue: {issue_type}
    Specific Details: {row['Details']}
    
    --- REGULATORY STANDARD ---
    Violated Regulation: {context.get('Regulation', 'General ESG Standards')}
    Requirement: {context.get('Requirement')}
    Recommended Action: {context.get('Action')}
    
    --- OUTPUT FORMAT ---
    Title: [Formal Audit Title]
    Severity: {row['Risk_Level']}
    Observation: [2-3 sentences explaining the breach technically]
    Regulatory Implication: [Cite the specific regulation mentioned above]
    Next Steps: [Actionable advice for the client]
    """
    return prompt

# Re-run the test loop
print("--- TESTING PROMPT GENERATION LOGIC ---\n")
for index, row in audit_merge.head(3).iterrows():
    print(f"Generated Prompt for {row['Supplier_ID']}:")
    print(generate_audit_prompt(row))
    print("-" * 50)

--- TESTING PROMPT GENERATION LOGIC ---

Generated Prompt for SUP-1064:

    ACT AS: Senior ESG Risk Auditor at EY.
    
    TASK: Write a formal 'Audit Observation Memo' for the following supplier.
    
    --- DATA CONTEXT ---
    Supplier ID: SUP-1064
    Industry: IT Services 
    Region: EMEA
    Detected Issue: Carbon Outlier
    Specific Details: Emissions (2735.41) far exceed industry avg (460.8)
    
    --- REGULATORY STANDARD ---
    Violated Regulation: BRSR Principle 6 (Environmental Impact)
    Requirement: Companies must minimize environmental footprint. Scope 1 & 2 emissions must be within industry deviation limits.
    Recommended Action: Immediate Environmental Impact Assessment (EIA) required.
    
    --- OUTPUT FORMAT ---
    Title: [Formal Audit Title]
    Severity: High
    Observation: [2-3 sentences explaining the breach technically]
    Regulatory Implication: [Cite the specific regulation mentioned above]
    Next Steps: [Actionable advice for the client]
   

In [None]:
import os
from groq import Groq
import time

# 1. Setup the "AI Auditor"
# In a real app, use os.environ.get("GROQ_API_KEY") for security.
# For this demo, paste your key below.
API_KEY = "YOUR_GROQ_API" 

client = Groq(api_key=API_KEY)

# 2. Define the 'Audit Loop' Function
def consult_ai_auditor(prompt):
    try:
        completion = client.chat.completions.create(
            messages=[
                {
                    "role": "system",
                    "content": "You are a rigid, no-nonsense ESG Auditor at EY. Output ONLY the requested audit memo format."
                },
                {
                    "role": "user",
                    "content": prompt,
                }
            ],
            model="llama-3.3-70b-versatile", # FLAGSHIP
            temperature=0.1,        # Low temperature = More factual/consistent
        )
        return completion.choices[0].message.content
    except Exception as e:
        return f"Error: {str(e)}"

# 3. Run the Batch Process (The "Magic" Step)
# We will run this on a sample of 5 bad actors first to test.
#print("üöÄ Starting Automated Audit Cycle...")
#audit_sample = audit_merge.head(5).copy() # Testing on top 5 first

# NEW (Full Production Run)
print(f"‚ö†Ô∏è STARTING FULL BATCH RUN: Processing {len(audit_merge)} exceptions.")
print("‚òï This will take a few minutes. Go grab a coffee.")

audit_sample = audit_merge.copy() # Now we take EVERYONE
audit_reports = []

for index, row in audit_sample.iterrows():
    print(f"Analyzing Supplier {row['Supplier_ID']}...", end=" ")
    
    # Generate the prompt using your function
    prompt_text = generate_audit_prompt(row)
    
    # Get the AI response
    ai_response = consult_ai_auditor(prompt_text)
    
    # Store result
    audit_reports.append({
        'Supplier_ID': row['Supplier_ID'],
        'Industry': row['Industry'],
        'Risk_Level': row['Risk_Level'],
        'AI_Audit_Memo': ai_response
    })
    print("‚úÖ Done.")
    time.sleep(1.5) # Polite pause to avoid rate limits

# 4. Save the Final "Client Deliverable"
final_report_df = pd.DataFrame(audit_reports)
final_report_df.to_csv('../data/final_audit_memos.csv', index=False)

print("\nüìÑ MISSION COMPLETE: Audit Memos saved to '../data/final_audit_memos.csv'")
print("üëÄ Check the CSV file to see your AI's work.")

‚ö†Ô∏è STARTING FULL BATCH RUN: Processing 109 exceptions.
‚òï This will take a few minutes. Go grab a coffee.
Analyzing Supplier SUP-1064... ‚úÖ Done.
Analyzing Supplier SUP-1312... ‚úÖ Done.
Analyzing Supplier SUP-1325... ‚úÖ Done.
Analyzing Supplier SUP-1389... ‚úÖ Done.
Analyzing Supplier SUP-1399... ‚úÖ Done.
Analyzing Supplier SUP-1958... ‚úÖ Done.
Analyzing Supplier SUP-1978... ‚úÖ Done.
Analyzing Supplier SUP-1990... ‚úÖ Done.
Analyzing Supplier SUP-1099... ‚úÖ Done.
Analyzing Supplier SUP-1291... ‚úÖ Done.
Analyzing Supplier SUP-1441... ‚úÖ Done.
Analyzing Supplier SUP-1675... ‚úÖ Done.
Analyzing Supplier SUP-1836... ‚úÖ Done.
Analyzing Supplier SUP-1853... ‚úÖ Done.
Analyzing Supplier SUP-1893... ‚úÖ Done.
Analyzing Supplier SUP-1323... ‚úÖ Done.
Analyzing Supplier SUP-1558... ‚úÖ Done.
Analyzing Supplier SUP-1971... ‚úÖ Done.
Analyzing Supplier SUP-1975... ‚úÖ Done.
Analyzing Supplier SUP-1044... ‚úÖ Done.
Analyzing Supplier SUP-1051... ‚úÖ Done.
Analyzing Supplier SUP-1071.

In [8]:
import pandas as pd

# 1. Load the FULL batch of memos you just generated
memos_df = pd.read_csv('../data/final_audit_memos.csv')
raw_df = pd.read_csv('../data/suppliers_raw.csv')

# 2. Merge them (Left Join)
dashboard_master = pd.merge(raw_df, memos_df[['Supplier_ID', 'Risk_Level', 'AI_Audit_Memo']], 
                            on='Supplier_ID', 
                            how='left')

# 3. Clean up the "Good" suppliers (Fill NaNs)
dashboard_master['Risk_Level'] = dashboard_master['Risk_Level'].fillna('Low/Compliant')
dashboard_master['AI_Audit_Memo'] = dashboard_master['AI_Audit_Memo'].fillna('No Issues Detected - Compliant with BRSR.')

# 4. Export the FINAL file for Power BI
dashboard_master.to_csv('../data/dashboard_master_data.csv', index=False)

print(f"‚úÖ FINAL DASHBOARD FILE CREATED with {len(dashboard_master)} rows.")
print("üöÄ You are ready to build the Power BI Dashboard.")

‚úÖ FINAL DASHBOARD FILE CREATED with 1002 rows.
üöÄ You are ready to build the Power BI Dashboard.
