In [1]:
import pandas as pd
from rdflib import Graph, Namespace, Literal, RDF, URIRef
from rdflib.namespace import XSD
import os

In [2]:
# Define RDF Namespace
EX = Namespace("http://example.org/esg/")
g = Graph()
g.bind("ex", EX)

# Folder path to your normalized ESG CSVs
base_path = "/Users/vinantipathare/Desktop/Docs/CS Project/Ontology_PCA_Project/Normalized_Data"

# File mapping based on screenshot
file_mapping = {
    "E_opportunity": "esg_environmental_opportunity_cleaned_industry_normalized_updated_pillar.csv",
    "E_risk": "esg_environmental_risk_cleaned_industry_normalized_updated_pillar.csv",
    "G_opportunity": "esg_governance_opportunity_cleaned_industry_normalized_updated_pillar.csv",
    "G_risk": "esg_governance_risk_cleaned_industry_normalized_updated_pillar.csv",
    "S_opportunity": "esg_social_opportunity_cleaned_industry_normalized_updated_pillar.csv",
    "S_risk": "esg_social_risk_cleaned_industry_normalized_updated_pillar.csv"
}


In [3]:
# Load ESG mapping
mapping_df = pd.read_csv(os.path.join(base_path, "esg_master_mapping_pillar_updated1.csv"))

# Helper function
def safe_uri(s):
    return URIRef(EX + str(s).strip().replace(" ", "_").replace("/", "_"))

In [4]:
# Process all six files
for pillar, file_name in file_mapping.items():
    df = pd.read_csv(os.path.join(base_path, file_name),delimiter='|', encoding='utf-8')

    for _, row in df.iterrows():
        metric = row['metric_name']
        industry = row['Industry']
        pillar = row['pillar']
        company = row['company_name']
        year = row['year']
        value = row['metric_value']

        # Match with mapping file
        match = mapping_df[
            (mapping_df['Metric'] == metric) &
            (mapping_df['Pillar'] == pillar) &
            (mapping_df['Industry'] == industry)
        ]

        if match.empty:
            continue

        topic = match['Topic'].values[0]
        sasb = match['SASB_Code'].values[0]

        subj = safe_uri(f"{company}_{industry}_{year}_{pillar}_{metric}")

        g.add((subj, RDF.type, EX.ESGObservation))
        g.add((subj, EX.company, Literal(company)))
        g.add((subj, EX.industry, Literal(industry)))
        g.add((subj, EX.year, Literal(int(year), datatype=XSD.gYear)))
        g.add((subj, EX.pillar, Literal(pillar)))
        g.add((subj, EX.metric, Literal(metric)))
        g.add((subj, EX.value, Literal(float(value), datatype=XSD.float)))
        g.add((subj, EX.topic, Literal(topic)))
        g.add((subj, EX.sasbCode, Literal(sasb)))




In [5]:
# Output RDF
output_file = "esg_combined1.ttl"
g.serialize(destination=output_file, format='turtle')
print(f"RDF file saved to: {output_file}")

RDF file saved to: esg_combined1.ttl
