In [16]:
import os
import pandas as pd
import pickle
from langchain_openai.embeddings.base import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain_openai import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
MODEL = "gpt-3.5-turbo"

# Paths
DATA_PATH = "data/handcrafted"
CRITERIA_PATH = 'criteria.csv'
VECTOR_DB_NAME = "vector_db.pkl"

# Load Criteria Data
criteria_df = pd.read_csv(CRITERIA_PATH)

# Function to Load Vector Database for Company
def load_vector_db(company_dir):
    db_path = os.path.join(company_dir, VECTOR_DB_NAME)
    if not os.path.exists(db_path):
        raise FileNotFoundError(f"Vector database not found for {company_dir}")
    with open(db_path, "rb") as f:
        db_bytes = pickle.load(f)
    return FAISS.deserialize_from_bytes(db_bytes, OpenAIEmbeddings(), allow_dangerous_deserialization=True)

def test_compliance(company, goal):
    # Load company-specific data
    company_dir = os.path.join(DATA_PATH, company)
    db = load_vector_db(company_dir)
    
    # Load the LLM
    llm = ChatOpenAI(model_name=MODEL, temperature=0, openai_api_key=OPENAI_API_KEY)

    # Retrieve compliance criteria for the specified goal
    criteria_row = criteria_df[criteria_df['Goal'] == goal]
    if criteria_row.empty:
        print(f"No compliance criteria found for the goal: {goal}")
        return

    high_criteria = criteria_row['HIGH Compliance'].values[0]
    medium_criteria = criteria_row['MEDIUM Compliance'].values[0]
    low_criteria = criteria_row['LOW Compliance'].values[0]

    # Formulate the question
    question = f"Based on the context, what is the company's level of compliance (HIGH, MEDIUM, or LOW) with respect to the goal '{goal}'? Provide a brief explanation."

    # Retrieve relevant documents
    docs = db.similarity_search(question)
    if not docs:
        print("No relevant context found.")
        return
    context = "\n".join([doc.page_content for doc in docs])

    # Define the system prompt
    system_prompt = """
You are an ESG compliance expert. Based on the provided context and compliance criteria, determine the company's level of compliance with respect to the specified goal. You must be very strict and harsh in your assessment. Use only the retrieved context to support your assessment. If the context lacks sufficient information, say "I don't know".

Context:
{context}

Compliance Criteria:
- HIGH Compliance: {high_criteria}
- MEDIUM Compliance: {medium_criteria}
- LOW Compliance: {low_criteria}

Question:
{query}
"""

    # Create the prompt template
    prompt_template = PromptTemplate(
        input_variables=["context", "high_criteria", "medium_criteria", "low_criteria", "query"],
        template=system_prompt
    )

    # Create the LLM chain
    chain = LLMChain(
        llm=llm,
        prompt=prompt_template
    )

    # Run the chain with all inputs
    result = chain.run(
        context=context,
        high_criteria=high_criteria,
        medium_criteria=medium_criteria,
        low_criteria=low_criteria,
        query=question
    )

    # Output the result
    print(result)


goals = criteria_df['Goal'].tolist()
test_compliance("Walmart", goals[0])
test_compliance("Walmart", goals[1])
test_compliance("Walmart", goals[2])
test_compliance("Walmart", goals[3])
test_compliance("Walmart", goals[4])

Based on the context provided, the company's level of compliance with the goal of 'Climate Neutrality by 2050' would be assessed as MEDIUM. While the company has set science-based targets to reduce GHG emissions in its own operations and is working with suppliers to reduce emissions in the supply chain, there is no explicit mention of a full transition to renewable energy or zero-emission operations by 2050. The company's actions focus on emission reductions and sustainability initiatives, but there is a lack of clear commitment to achieving climate neutrality by 2050.
Based on the context provided, the company's level of compliance with the goal of achieving a 55% emissions reduction by 2030 would be assessed as LOW. While the company has set science-based targets and initiatives to reduce emissions in its own operations and supply chain, the specific reduction target of 55% by 2030 is not explicitly mentioned or addressed in the context. Additionally, the information provided indicat