In [80]:
import numpy as np
import pandas as pd

In [81]:
gpus = pd.read_csv('data/gpus.csv')
impact = pd.read_csv('data/impact.csv')
instances = pd.read_csv('data/instances.csv')

In [82]:
gpus.head()

Unnamed: 0,name,type,tdp_watts,TFLOPS32,TFLOPS16,GFLOPS32/W,GFLOPS16/W,memory,source
0,A100 PCIe 40/80GB,gpu,250,,312.0,,,40.0,https://www.nvidia.com/content/dam/en-zz/Solut...
1,A100 SXM4 80 GB,gpu,400,,312.0,,,80.0,https://www.nvidia.com/content/dam/en-zz/Solut...
2,AGX Xavier,gpu,30,16.0,32.0,533.33,1066.67,,https://elinux.org/Jetson_AGX_Xavier
3,AMD RX480,gpu,150,5.8,5.8,38.67,38.67,8.0,techpowerup.com
4,GIGABYTE GTX 1660 OC,gpu,120,5.153,10.31,,,6.0,https://www.techpowerup.com/gpu-specs/gigabyte...


In [83]:
impact.head()

Unnamed: 0,provider,providerName,offsetRatio,region,regionName,country,state,city,impact,source,PUE,PUE source,comment
0,gcp,Google Cloud Platform,100,asia-east1,,Taiwan,,Changhua County,557.0,"measured on July 2, 2019 between 8 and 9 PM, ...",1.12,https://www.google.com/about/datacenters/effic...,Datacenter-specific
1,gcp,Google Cloud Platform,100,asia-east2,,China,,Hong Kong,702.0,https://www.sciencedirect.com/science/article/...,,,
2,gcp,Google Cloud Platform,100,asia-northeast1,,Japan,,Tokyo,516.0,https://www.sciencedirect.com/science/article/...,,,
3,gcp,Google Cloud Platform,100,asia-northeast2,,Japan,,Osaka,516.0,https://www.fepc.or.jp/library/pamphlet/pdf/04...,,,
4,gcp,Google Cloud Platform,100,asia-south1,,India,,Mumbai,920.0,https://www.fepc.or.jp/library/pamphlet/pdf/04...,,,


In [84]:
instances.head()

Unnamed: 0,provider,id,gpu,source
0,aws,p3.2xlarge,Tesla V100,https://aws.amazon.com/fr/ec2/instance-types/p3/
1,aws,p3.8xlarge,Tesla V100,https://aws.amazon.com/fr/ec2/instance-types/p3/
2,aws,p3.16xlarge,Tesla V100,https://aws.amazon.com/fr/ec2/instance-types/p3/
3,aws,p3dn.24xlarge,Tesla V100,https://aws.amazon.com/fr/ec2/instance-types/p3/
4,aws,p2.xlarge,Tesla K80,https://aws.amazon.com/fr/ec2/instance-types/p2/


In [85]:
gpu_power = gpus[['name', 'tdp_watts']].copy()
gpu_power.index = gpu_power['name']
gpu_power = gpu_power.drop('name', axis=1)
gpu_power = gpu_power.to_dict()['tdp_watts']
list(gpu_power.items())[:5]

[('A100 PCIe 40/80GB', 250),
 ('A100 SXM4 80 GB', 400),
 ('AGX Xavier', 30),
 ('AMD RX480', 150),
 ('GIGABYTE GTX 1660 OC', 120)]

In [95]:
carbon_region = impact[['providerName', 'region', 'impact']].copy()
carbon_region['impact'] = carbon_region['impact'].apply(lambda x: x / 1000)
# convert to a nested dictionary with provider -> region -> impact
carbon_region = carbon_region.groupby(['providerName', 'region']).sum().reset_index()
carbon_region = carbon_region.set_index(['providerName', 'region'])
carbon_region = carbon_region.to_dict()['impact']
carbon_region

new_carbon_region = {}
for (provider, region), val in carbon_region.items():
    new_carbon_region.setdefault(provider, {})[region] = val
carbon_region = new_carbon_region

In [87]:
# test inputs
hardware_type = 'GTX 1080 Ti'
region = 'us-west-1'
platform = 'Amazon Web Services'
time = 100 # hours

# formula
co2_emission = gpu_power[hardware_type] * time * carbon_region[platform][region] / 1000
print(f"CO2 emission for {hardware_type} in {platform} {region} for {time} hours: {co2_emission:.2f} kg eq. CO2")

CO2 emission for GTX 1080 Ti in Amazon Web Services us-west-1 for 100 hours: 6.01 kg eq. CO2


In [88]:
# test inputs
hardware_type = 'GTX 1080 Ti'
region = 'us-west1'
platform = 'Google Cloud Platform'
time = 100 # hours

# formula
co2_emission = gpu_power[hardware_type] * time * carbon_region[platform][region] / 1000
print(f"CO2 emission for {hardware_type} in {platform} {region} for {time} hours: {co2_emission:.2f} kg eq. CO2")

CO2 emission for GTX 1080 Ti in Google Cloud Platform us-west1 for 100 hours: 7.44 kg eq. CO2


In [89]:
kgC02PerKm = (3.98 * 1e-4 * 1e3) / 1.609344
eqDriven = co2_emission / kgC02PerKm
print(f"Equivalent distance driven by an average passenger vehicle: {eqDriven:.2f} km")

kgCoalBurnedPerKg = 9.05 * 1e-4 * 1e3 * 2.204623
eqCoalBurned = co2_emission / kgCoalBurnedPerKg
print(f"Equivalent coal burned: {eqCoalBurned:.2f} kg")

kgC02SequestratedBySeedling = 0.06 * 1e3
eqSeedling = co2_emission / kgC02SequestratedBySeedling
print(f"Equivalent CO2 sequestrated by seedling trees in 10 years: {eqSeedling:.2f} trees")

Equivalent distance driven by an average passenger vehicle: 30.08 km
Equivalent coal burned: 3.73 kg
Equivalent CO2 sequestrated by seedling trees in 10 years: 0.12 trees


In [111]:
cloud_providers = list(impact['providerName'].unique())
cloud_providers.remove('Scaleway')
cloud_providers

['Google Cloud Platform',
 'Amazon Web Services',
 'Azure',
 'OVHCloud',
 'CoreWeave',
 'Seeweb']

In [None]:
hardware_types = list(gpus['name'].unique())
hardware_types

In [115]:
# create regions as a dictionary of provider -> region
regions = {}
for provider in cloud_providers:
    regions[provider] = list(carbon_region[provider].keys())

In [120]:
from codecarbon import track_emissions

@track_emissions()
def calculate_llm_emissions(num_queries):
        """
        Calculate CO2 emissions for Large Language Models.
        """
        # CO2 emissions per query
        co2_emission_per_query = 4.2e-3
        co2_emissions = num_queries * co2_emission_per_query
        return co2_emissions

In [122]:
@track_emissions()
def heavy_computation(num_queries):
        """
        Perform a computationally heavy task.
        """
        for _ in range(num_queries):
            for _ in range(1000):
                _ = np.linalg.eigvals(np.random.rand(100, 100))
        return