In [21]:
# 1. Imports & config
import os, sys
from pathlib import Path
import pandas as pd
import plotly.express as px

sys.path.append(os.path.abspath('../'))
from scripts.config import PROCESSED_DATA_PATH

DATA_PATH = Path(PROCESSED_DATA_PATH)
print(f"Using processed dataset: {DATA_PATH}")

Using processed dataset: ..\data\processed\esg_data_cleaned.csv


In [22]:
# 2. Load data
if not DATA_PATH.exists():
    raise FileNotFoundError(f"Processed dataset not found at {DATA_PATH}. Run 01_data_cleaning first.")
df = pd.read_csv(DATA_PATH)
df.columns = df.columns.str.strip()
print(df.shape)
df.head()

(503, 15)


Unnamed: 0,Symbol,Name,Address,Sector,Industry,Full Time Employees,Description,Total ESG Risk score,Environment Risk Score,Governance Risk Score,Social Risk Score,Controversy Level,Controversy Score,ESG Risk Percentile,ESG Risk Level
0,ENPH,"Enphase Energy, Inc.","47281 Bayside Parkway\nFremont, CA 94538\nUnit...",Technology,Solar,3157,"Enphase Energy, Inc., together with its subsid...",21.05,4.05,6.1,8.9,,2.0,,
1,EMN,Eastman Chemical Company,"200 South Wilcox Drive\nKingsport, TN 37662\nU...",Basic Materials,Specialty Chemicals,14000,Eastman Chemical Company operates as a special...,25.3,12.8,6.6,5.8,Moderate Controversy Level,2.0,50th percentile,Medium
2,DPZ,Domino's Pizza Inc.,"30 Frank Lloyd Wright Drive\nAnn Arbor, MI 481...",Consumer Cyclical,Restaurants,6500,"Domino's Pizza, Inc., through its subsidiaries...",29.2,10.6,6.3,12.2,Moderate Controversy Level,2.0,66th percentile,Medium
3,DAY,"Dayforce, Inc.","3311 East Old Shakopee Road\nMinneapolis, MN 5...",Technology,Software - Application,9084,"Dayforce Inc., together with its subsidiaries,...",21.05,4.05,6.1,8.9,,2.0,,
4,DVA,Davita Inc.,"2000 16th Street\nDenver, CO 80202\nUnited States",Healthcare,Medical Care Facilities,70000,DaVita Inc. provides kidney dialysis services ...,22.6,0.1,8.4,14.1,Moderate Controversy Level,2.0,38th percentile,Medium


In [23]:
# 3. Average ESG risk by sector (interactive)
metric = 'Total ESG Risk score'
fig = px.bar(
    df.groupby('Sector')[metric].mean().reset_index(),
    x='Sector', y=metric, color=metric,
    title='Average ESG Risk Score by Sector',
    color_continuous_scale='viridis'
)
fig.show()

In [24]:
# 4. ESG risk scatter bubble
fig2 = px.scatter(
    df,
    x='Environment Risk Score', y='Governance Risk Score',
    size='Controversy Score', color='ESG Risk Level',
    hover_name='Name', title='ESG Risk Scatter',
    size_max=40, opacity=0.75
)
fig2.show()

In [None]:
# 5. Export dashboard-ready dataset
output_path = Path('../data/processed/esg_dashboard_ready.csv')
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(output_path, index=False)
print(f"✅ Dashboard-ready dataset saved to {output_path}")

✅ Dashboard-ready dataset saved to data\processed\esg_dashboard_ready.csv
