# ESG Compliance Analysis with Argentine Laws and UNFCCC

This notebook analyzes ESG documents against Argentine environmental laws, policies, and UNFCCC commitments to assess compliance levels. The analysis uses Natural Language Processing (NLP) techniques to compare document contents and generate compliance metrics.

## Objectives
1. Load and preprocess ESG documents and Argentine laws/policies
2. Create embeddings for both document sets
3. Compare documents using semantic similarity
4. Generate compliance metrics and visualizations
5. Identify gaps in compliance and areas for improvement

In [None]:
# Install required packages
# !pip install sentence-transformers pandas numpy plotly scikit-learn

In [1]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import plotly.graph_objects as go
from sklearn.metrics.pairwise import cosine_similarity
import os

  from .autonotebook import tqdm as notebook_tqdm


## 1. Loading and Preprocessing Data

In [7]:
# Load ESG documents
# esg_df = pd.read_csv('data/esg_documents.csv')

# Load Argentine laws and policies
laws_df = pd.read_csv('data/Argentina_laws_policies_UNFCCC.csv')

# Display basic information about the datasets
# print("ESG Documents Shape:", esg_df.shape)
print(laws_df.head())

  Collection Name  Collection Summary  \
0             NaN                 NaN   
1             NaN                 NaN   
2             NaN                 NaN   
3             NaN                 NaN   
4             NaN                 NaN   

                                         Family Name  \
0  National Action Plan for Energy and Climate Ch...   
1  Law 26473 Prohibiting commercialisation of inc...   
2  Decree 140/2007: Presidential decree declaring...   
3  Law 26.093 (2006) Regimen of Regulation and Pr...   
4     Law 26.123 (2006) Promotion of Hydrogen Energy   

                                      Family Summary  \
0  <p>The objective of this document is to plan t...   
1  Prohibits the importation and commercialisatio...   
2  This decree has far-reaching and ambitious goa...   
3  The object of the law is to provide a regulato...   
4  The law declares the technological development...   

                                          Family URL  \
0  https://app.climatep

In [None]:
# Initialize the sentence transformer model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Create embeddings for ESG documents
esg_embeddings = model.encode(esg_df['content'].tolist(), show_progress_bar=True)

# Create embeddings for laws and policies
laws_embeddings = model.encode(laws_df['Document Content'].tolist(), show_progress_bar=True)

## 2. Compliance Analysis

In [None]:
# Calculate similarity matrix between ESG documents and laws
similarity_matrix = cosine_similarity(esg_embeddings, laws_embeddings)

# Create a DataFrame with similarity scores
compliance_df = pd.DataFrame(
    similarity_matrix,
    index=esg_df['company'],
    columns=laws_df['Document Title']
)

In [None]:
# Calculate overall compliance score for each company
compliance_scores = compliance_df.mean(axis=1)

# Create a bar plot of compliance scores
fig = go.Figure(data=[
    go.Bar(x=compliance_scores.index, y=compliance_scores.values)
])

fig.update_layout(
    title='ESG Compliance Scores by Company',
    xaxis_title='Company',
    yaxis_title='Compliance Score',
    yaxis_range=[0, 1]
)

fig.show()

## 3. Detailed Analysis

In [None]:
# Function to identify top matching laws for each company
def get_top_matches(company, n=5):
    company_scores = compliance_df.loc[company].sort_values(ascending=False)
    return company_scores.head(n)

# Display top matches for each company
for company in compliance_df.index:
    print(f"\nTop matches for {company}:")
    print(get_top_matches(company))

In [None]:
# Analyze compliance by topic
topic_compliance = pd.DataFrame()

for topic in laws_df['Topic'].unique():
    topic_laws = laws_df[laws_df['Topic'] == topic]
    topic_indices = topic_laws.index
    
    # Calculate average compliance for each topic
    topic_scores = compliance_df.iloc[:, topic_indices].mean(axis=1)
    topic_compliance[topic] = topic_scores

# Create a heatmap of compliance by topic
fig = go.Figure(data=go.Heatmap(
    z=topic_compliance.values,
    x=topic_compliance.columns,
    y=topic_compliance.index
))

fig.update_layout(
    title='Compliance Heatmap by Topic',
    xaxis_title='Topic',
    yaxis_title='Company'
)

fig.show()

## 4. Recommendations and Insights

In [None]:
# Generate recommendations based on compliance analysis
def generate_recommendations(company):
    # Get lowest compliance areas
    low_compliance_topics = topic_compliance.loc[company].sort_values().head(3)
    
    recommendations = []
    for topic, score in low_compliance_topics.items():
        if score < 0.5:
            recommendations.append(f"Improve compliance with {topic} policies (current score: {score:.2f})")
    
    return recommendations

# Display recommendations for each company
for company in compliance_df.index:
    print(f"\nRecommendations for {company}:")
    for rec in generate_recommendations(company):
        print(f"- {rec}")