In [14]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Load the time-series data
df = pd.read_csv('merged_datacenters_esg_timeseries.csv')

print(f"Shape: {df.shape}")
print(f"Years: {sorted(df['Year'].unique())}")
print(f"Regions: {sorted(df['region'].unique())}")
print("\nColumns:", df.columns.tolist())

Shape: (2101, 22)
Years: [np.int64(2015), np.int64(2016), np.int64(2017), np.int64(2018), np.int64(2019), np.int64(2020), np.int64(2021), np.int64(2022), np.int64(2023), np.int64(2024), np.int64(2025)]
Regions: ['Africa', 'Asia', 'Europe', 'Latin America', 'Middle East', 'North America', 'Oceania']

Columns: ['country', 'total_data_centers', 'hyperscale_data_centers', 'colocation_data_centers', 'floor_space_sqft_total', 'power_capacity_MW_total', 'average_renewable_energy_usage_percent', 'internet_penetration_percent', 'growth_rate_of_data_centers_percent_per_year', 'tier_I', 'tier_II', 'tier_III', 'tier_IV', 'has_tier_info', 'num_cooling_technologies', 'region', 'Year', 'ESG_Overall', 'ESG_Environmental', 'CarbonEmissions', 'WaterUsage', 'EnergyConsumption']


In [18]:
# Plot 1: Line chart showing how Carbon/Water/Energy consumption changed over time
# We'll aggregate by region and year first

plot1_data = df.groupby(['region', 'Year']).agg({
    'CarbonEmissions': 'mean',
    'WaterUsage': 'mean',
    'EnergyConsumption': 'mean',
    'ESG_Environmental': 'mean'
}).reset_index()

# Start with one metric - CarbonEmissions
fig1 = px.line(plot1_data, 
               x='Year', 
               y='CarbonEmissions',
               color='region',
               markers=True,
               title='Carbon Emissions Over Time by Region (2015-2025)',
               labels={
                   'CarbonEmissions': 'Carbon Emissions (log-transformed)',
                   'Year': 'Year',
                   'region': 'Region'
               },
               template='plotly_white')

fig1.update_layout(
    height=500,
    hovermode='x unified',
    legend=dict(title='Region', orientation='v')
)

# Instead of fig3.show(), use:
fig1.write_html('plot1.html')

In [19]:
# Plot 2: For a specific year, show relationship between data centers and ESG
# Let's use 2025 (most recent year)

plot2_data = df[df['Year'] == 2025]

fig2 = px.scatter(plot2_data,
                  x='power_capacity_MW_total',
                  y='CarbonEmissions',
                  size='total_data_centers',
                  color='region',
                  hover_data=['country', 'average_renewable_energy_usage_percent', 
                             'ESG_Environmental'],
                  title='Data Center Power Capacity vs Carbon Emissions (2025)',
                  labels={
                      'power_capacity_MW_total': 'Power Capacity (MW, log-transformed)',
                      'CarbonEmissions': 'Carbon Emissions (log-transformed)',
                      'total_data_centers': 'Total Data Centers',
                      'region': 'Region'
                  },
                  template='plotly_white',
                  opacity=0.7)

fig2.update_layout(height=600)
fig2.write_html('plot2.html')

In [20]:
# Plot 3: Bar chart comparing multiple metrics across regions for a specific year
# Aggregate by region for 2025

plot3_data = df[df['Year'] == 2025].groupby('region').agg({
    'total_data_centers': 'sum',
    'CarbonEmissions': 'mean',
    'ESG_Environmental': 'mean',
    'average_renewable_energy_usage_percent': 'mean'
}).reset_index()

# Create subplots for multiple metrics
fig3 = make_subplots(
    rows=2, cols=2,
    subplot_titles=('Total Data Centers', 'Carbon Emissions', 
                    'ESG Environmental Score', 'Renewable Energy Usage %'),
    specs=[[{'type': 'bar'}, {'type': 'bar'}],
           [{'type': 'bar'}, {'type': 'bar'}]]
)

# Add traces
fig3.add_trace(
    go.Bar(x=plot3_data['region'], y=plot3_data['total_data_centers'], 
           name='Data Centers', marker_color='lightblue'),
    row=1, col=1
)

fig3.add_trace(
    go.Bar(x=plot3_data['region'], y=plot3_data['CarbonEmissions'], 
           name='Carbon', marker_color='coral'),
    row=1, col=2
)

fig3.add_trace(
    go.Bar(x=plot3_data['region'], y=plot3_data['ESG_Environmental'], 
           name='ESG Environmental', marker_color='lightgreen'),
    row=2, col=1
)

fig3.add_trace(
    go.Bar(x=plot3_data['region'], y=plot3_data['average_renewable_energy_usage_percent'], 
           name='Renewable %', marker_color='gold'),
    row=2, col=2
)

fig3.update_layout(
    height=700,
    title_text='Regional Comparison of Data Center Metrics (2025)',
    showlegend=False,
    template='plotly_white'
)

# Rotate x-axis labels
fig3.update_xaxes(tickangle=-45)

fig3.write_html('plot3.html')