# Summary of the carbon emissions

In [1]:
from datetime import datetime, timedelta
import pandas as pd

In [76]:
def read_emissions(filepath: str):
    return pd.read_csv(filepath)
    
def clean_emissions(emissions_df):
    # Remove huge outliers for every project
    cleaned_emissions = []
    for project_name in emissions_df['project_name'].unique():
        project_emissions_df = emissions_df[emissions_df['project_name'] == project_name]
        cleaned_emissions.append(project_emissions_df[project_emissions_df['emissions'] <= project_emissions_df['emissions'].quantile(0.7)])
    return pd.concat(cleaned_emissions)

def predict_aggregated_emissions(emissions_df, timespan, predicted_timespan):
    aggregated_emissions_df = emissions_df.groupby(['project_name']).mean().reset_index()
    scale = predicted_timespan/timespan
    predicted_emissions_df = pd.DataFrame(index=aggregated_emissions_df.index)
    predicted_emissions_df['timestamp'] = emissions_df['timestamp'].max()
    predicted_emissions_df['project_name'] = aggregated_emissions_df['project_name']
    predicted_emissions_df['run_id'] = emissions_df['run_id'].max()
    predicted_emissions_df['duration'] = scale * aggregated_emissions_df['duration']
    predicted_emissions_df['emissions'] = scale * aggregated_emissions_df['emissions']
    predicted_emissions_df['emissions_rate'] = emissions_df['emissions_rate'].mode()[0]
    predicted_emissions_df['cpu_power'] = scale * aggregated_emissions_df['cpu_power']
    predicted_emissions_df['gpu_power'] = scale * aggregated_emissions_df['gpu_power']
    predicted_emissions_df['ram_power'] = scale * aggregated_emissions_df['ram_power']
    predicted_emissions_df['cpu_energy'] = scale * aggregated_emissions_df['cpu_energy']
    predicted_emissions_df['gpu_energy'] = scale * aggregated_emissions_df['gpu_energy']
    predicted_emissions_df['ram_energy'] = scale * aggregated_emissions_df['ram_energy']
    predicted_emissions_df['energy_consumed'] = scale * aggregated_emissions_df['energy_consumed']
    predicted_emissions_df['country_name'] = emissions_df['country_name'].mode()[0]
    predicted_emissions_df['country_iso_code'] = emissions_df['country_iso_code'].mode()[0]
    predicted_emissions_df['region'] = emissions_df['region'].mode()
    predicted_emissions_df['cloud_provider'] = emissions_df['cloud_provider'].mode()
    predicted_emissions_df['cloud_region'] = emissions_df['cloud_region'].mode()
    predicted_emissions_df['os'] = emissions_df['os'].mode()[0]
    predicted_emissions_df['python_version'] = emissions_df['python_version'].mode()[0]
    predicted_emissions_df['cpu_count'] = emissions_df['cpu_count'].mode()[0]
    predicted_emissions_df['cpu_model'] = emissions_df['cpu_model'].mode()[0]
    predicted_emissions_df['gpu_count'] = emissions_df['gpu_count'].mode()[0]
    predicted_emissions_df['gpu_model'] = emissions_df['gpu_model'].mode()[0]
    predicted_emissions_df['longitude'] = emissions_df['longitude'].mode()
    predicted_emissions_df['latitude'] = emissions_df['latitude'].mode()
    predicted_emissions_df['ram_total_size'] = emissions_df['ram_total_size'].mode()[0]
    predicted_emissions_df['tracking_mode'] = emissions_df['tracking_mode'].mode()[0]
    predicted_emissions_df['on_cloud'] = emissions_df['on_cloud'].mode()[0]    
    return predicted_emissions_df

def predict_emissions(emissions_df, timespan, predicted_timespan):
    scale = predicted_timespan/timespan
    predicted_emissions_df = emissions_df.copy()    
    predicted_emissions_df['duration'] = scale * emissions_df['duration']
    predicted_emissions_df['emissions'] = scale * emissions_df['emissions']
    predicted_emissions_df['cpu_power'] = scale * emissions_df['cpu_power']
    predicted_emissions_df['gpu_power'] = scale * emissions_df['gpu_power']
    predicted_emissions_df['ram_power'] = scale * emissions_df['ram_power']
    predicted_emissions_df['cpu_energy'] = scale * emissions_df['cpu_energy']
    predicted_emissions_df['gpu_energy'] = scale * emissions_df['gpu_energy']
    predicted_emissions_df['ram_energy'] = scale * emissions_df['ram_energy']
    predicted_emissions_df['energy_consumed'] = scale * emissions_df['energy_consumed']
    return predicted_emissions_df

def repeat_emissions(emissions_df, total_count):
    scale = int(total_count/emissions_df.shape[0])
    predicted_emissions_df = emissions_df.loc[emissions_df.index.repeat(scale)].reset_index(drop=True)
    if predicted_emissions_df.shape[0] < total_count:
        predicted_emissions_df = pd.concat([predicted_emissions_df, predicted_emissions_df[:total_count - predicted_emissions_df.shape[0]]])
    return predicted_emissions_df

In [56]:
emissions_df = read_emissions("../log/emissions.user")
timespan = timedelta(hours=2)
predicted_timespan = timedelta(hours=24)
predicted_emissions_df = predict_emissions(emissions_df, timespan, predicted_timespan)
predicted_emissions_df = clean_emissions(predicted_emissions_df)
predicted_emissions_df = repeat_emissions(predicted_emissions_df, 365)
predicted_emissions_df

Unnamed: 0,timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,...,python_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud
0,2023-11-08T09:10:16,Urban Digital Twin Bonn - Read SDF,78b82e36-bd75-4d4a-8e2f-99cc8cd8e404,9144.392956,0.060479,0.006614,510.0,59.844,142.988674,0.107734,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
1,2023-11-08T09:10:16,Urban Digital Twin Bonn - Read SDF,78b82e36-bd75-4d4a-8e2f-99cc8cd8e404,9144.392956,0.060479,0.006614,510.0,59.844,142.988674,0.107734,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
2,2023-11-08T09:10:16,Urban Digital Twin Bonn - Read SDF,78b82e36-bd75-4d4a-8e2f-99cc8cd8e404,9144.392956,0.060479,0.006614,510.0,59.844,142.988674,0.107734,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
3,2023-11-08T09:10:16,Urban Digital Twin Bonn - Read SDF,78b82e36-bd75-4d4a-8e2f-99cc8cd8e404,9144.392956,0.060479,0.006614,510.0,59.844,142.988674,0.107734,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
4,2023-11-08T09:10:16,Urban Digital Twin Bonn - Read SDF,78b82e36-bd75-4d4a-8e2f-99cc8cd8e404,9144.392956,0.060479,0.006614,510.0,59.844,142.988674,0.107734,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2023-11-08T09:11:48,Urban Digital Twin Bonn - Read FC,bda08c36-524d-4d22-873f-00e0f3d18486,1067.848706,0.006904,0.006465,510.0,0.000,142.988674,0.012573,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
361,2023-11-08T09:11:48,Urban Digital Twin Bonn - Read FC,bda08c36-524d-4d22-873f-00e0f3d18486,1067.848706,0.006904,0.006465,510.0,0.000,142.988674,0.012573,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
362,2023-11-08T09:11:48,Urban Digital Twin Bonn - Read FC,bda08c36-524d-4d22-873f-00e0f3d18486,1067.848706,0.006904,0.006465,510.0,0.000,142.988674,0.012573,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N
363,2023-11-08T09:11:48,Urban Digital Twin Bonn - Read FC,bda08c36-524d-4d22-873f-00e0f3d18486,1067.848706,0.006904,0.006465,510.0,0.000,142.988674,0.012573,...,3.9.16,24,12th Gen Intel(R) Core(TM) i7-12800HX,1,1 x NVIDIA RTX A2000 8GB Laptop GPU,,,31.775261,machine,N


In [71]:
predicted_emissions_df.to_csv("../log/emissions-year.user", index=False)

In [79]:
emissions_df = read_emissions("../log/emissions-read.user")
timespan = timedelta(hours=2)
predicted_timespan = timedelta(hours=24)
predicted_emissions_df = predict_emissions(emissions_df, timespan, predicted_timespan)
predicted_emissions_df = clean_emissions(predicted_emissions_df)
predicted_emissions_df = repeat_emissions(predicted_emissions_df, 365)
predicted_emissions_df

Unnamed: 0,timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,...,python_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud
0,2023-11-08T12:07:34,Urban Digital Twin Bonn - Read SDF,5795b5b6-15f5-4af3-9dfb-08e0b1f235a7,6359.035466,0.033933,0.005336,510.0,0.0,71.995073,0.074932,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
1,2023-11-08T12:07:34,Urban Digital Twin Bonn - Read SDF,5795b5b6-15f5-4af3-9dfb-08e0b1f235a7,6359.035466,0.033933,0.005336,510.0,0.0,71.995073,0.074932,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
2,2023-11-08T12:07:34,Urban Digital Twin Bonn - Read SDF,5795b5b6-15f5-4af3-9dfb-08e0b1f235a7,6359.035466,0.033933,0.005336,510.0,0.0,71.995073,0.074932,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
3,2023-11-08T12:07:34,Urban Digital Twin Bonn - Read SDF,5795b5b6-15f5-4af3-9dfb-08e0b1f235a7,6359.035466,0.033933,0.005336,510.0,0.0,71.995073,0.074932,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
4,2023-11-08T12:07:34,Urban Digital Twin Bonn - Read SDF,5795b5b6-15f5-4af3-9dfb-08e0b1f235a7,6359.035466,0.033933,0.005336,510.0,0.0,71.995073,0.074932,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2023-11-08T19:47:29,Urban Digital Twin Bonn - Read FC,8b2a1248-1020-41a1-a42e-bdc49679be4b,965.811024,0.005155,0.005338,510.0,0.0,71.995073,0.011384,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
361,2023-11-08T19:47:29,Urban Digital Twin Bonn - Read FC,8b2a1248-1020-41a1-a42e-bdc49679be4b,965.811024,0.005155,0.005338,510.0,0.0,71.995073,0.011384,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
362,2023-11-08T19:47:29,Urban Digital Twin Bonn - Read FC,8b2a1248-1020-41a1-a42e-bdc49679be4b,965.811024,0.005155,0.005338,510.0,0.0,71.995073,0.011384,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
363,2023-11-08T19:47:29,Urban Digital Twin Bonn - Read FC,8b2a1248-1020-41a1-a42e-bdc49679be4b,965.811024,0.005155,0.005338,510.0,0.0,71.995073,0.011384,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N


In [81]:
predicted_emissions_df.to_csv("../log/emissions-read-year.user", index=False)

In [115]:
emissions_df = read_emissions("../log/emissions-measure.user")
timespan = timedelta(hours=2)
predicted_timespan = timedelta(hours=24)
predicted_emissions_df = predict_emissions(emissions_df, timespan, predicted_timespan)
predicted_emissions_df = clean_emissions(predicted_emissions_df)
predicted_emissions_df = repeat_emissions(predicted_emissions_df, 365)
predicted_emissions_df

Unnamed: 0,timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,...,python_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud
0,2023-11-08T12:36:28,Urban Digital Twin Bonn - Measure,28d5487d-275b-4a98-87f7-787462e92535,19934.364306,0.106412,0.005338,510.0,0.0,71.995073,0.234980,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
1,2023-11-08T12:36:28,Urban Digital Twin Bonn - Measure,28d5487d-275b-4a98-87f7-787462e92535,19934.364306,0.106412,0.005338,510.0,0.0,71.995073,0.234980,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
2,2023-11-08T12:36:28,Urban Digital Twin Bonn - Measure,28d5487d-275b-4a98-87f7-787462e92535,19934.364306,0.106412,0.005338,510.0,0.0,71.995073,0.234980,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
3,2023-11-08T12:36:28,Urban Digital Twin Bonn - Measure,28d5487d-275b-4a98-87f7-787462e92535,19934.364306,0.106412,0.005338,510.0,0.0,71.995073,0.234980,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
4,2023-11-08T12:36:28,Urban Digital Twin Bonn - Measure,28d5487d-275b-4a98-87f7-787462e92535,19934.364306,0.106412,0.005338,510.0,0.0,71.995073,0.234980,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2023-11-08T20:15:54,Urban Digital Twin Bonn - Measure,594a663c-5d47-4c41-b127-0a2c10fa21e3,20434.058390,0.109076,0.005338,510.0,0.0,71.995073,0.240861,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
361,2023-11-08T20:15:54,Urban Digital Twin Bonn - Measure,594a663c-5d47-4c41-b127-0a2c10fa21e3,20434.058390,0.109076,0.005338,510.0,0.0,71.995073,0.240861,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
362,2023-11-08T20:15:54,Urban Digital Twin Bonn - Measure,594a663c-5d47-4c41-b127-0a2c10fa21e3,20434.058390,0.109076,0.005338,510.0,0.0,71.995073,0.240861,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
363,2023-11-08T20:15:54,Urban Digital Twin Bonn - Measure,594a663c-5d47-4c41-b127-0a2c10fa21e3,20434.058390,0.109076,0.005338,510.0,0.0,71.995073,0.240861,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N


In [116]:
predicted_emissions_df.to_csv("../log/emissions-measure-year.user", index=False)

In [117]:
emissions_df = read_emissions("../log/emissions-patterns.user")
timespan = timedelta(hours=2)
predicted_timespan = timedelta(hours=24)
predicted_emissions_df = predict_emissions(emissions_df, timespan, predicted_timespan)
predicted_emissions_df = clean_emissions(predicted_emissions_df)
predicted_emissions_df = repeat_emissions(predicted_emissions_df, 365)
predicted_emissions_df

Unnamed: 0,timestamp,project_name,run_id,duration,emissions,emissions_rate,cpu_power,gpu_power,ram_power,cpu_energy,...,python_version,cpu_count,cpu_model,gpu_count,gpu_model,longitude,latitude,ram_total_size,tracking_mode,on_cloud
0,2023-11-09T12:06:17,Urban Digital Twin Bonn - Patterns,1cb0689e-5726-41f8-85ca-962e7125a5e6,2180.621089,0.011655,0.005345,510.0,0.0,71.995073,0.025735,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
1,2023-11-09T12:06:17,Urban Digital Twin Bonn - Patterns,1cb0689e-5726-41f8-85ca-962e7125a5e6,2180.621089,0.011655,0.005345,510.0,0.0,71.995073,0.025735,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
2,2023-11-09T12:06:17,Urban Digital Twin Bonn - Patterns,1cb0689e-5726-41f8-85ca-962e7125a5e6,2180.621089,0.011655,0.005345,510.0,0.0,71.995073,0.025735,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
3,2023-11-09T12:06:17,Urban Digital Twin Bonn - Patterns,1cb0689e-5726-41f8-85ca-962e7125a5e6,2180.621089,0.011655,0.005345,510.0,0.0,71.995073,0.025735,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
4,2023-11-09T12:06:17,Urban Digital Twin Bonn - Patterns,1cb0689e-5726-41f8-85ca-962e7125a5e6,2180.621089,0.011655,0.005345,510.0,0.0,71.995073,0.025735,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
360,2023-11-09T12:30:42,Urban Digital Twin Bonn - Patterns,50b7d9b3-24c6-4bc0-bceb-24435827bd37,2168.075006,0.011588,0.005345,510.0,0.0,71.995073,0.025586,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
361,2023-11-09T12:30:42,Urban Digital Twin Bonn - Patterns,50b7d9b3-24c6-4bc0-bceb-24435827bd37,2168.075006,0.011588,0.005345,510.0,0.0,71.995073,0.025586,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
362,2023-11-09T12:30:42,Urban Digital Twin Bonn - Patterns,50b7d9b3-24c6-4bc0-bceb-24435827bd37,2168.075006,0.011588,0.005345,510.0,0.0,71.995073,0.025586,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N
363,2023-11-09T12:30:42,Urban Digital Twin Bonn - Patterns,50b7d9b3-24c6-4bc0-bceb-24435827bd37,2168.075006,0.011588,0.005345,510.0,0.0,71.995073,0.025586,...,3.9.16,4,Intel(R) Xeon(R) Platinum 8370C CPU @ 2.80GHz,,,,,15.998905,machine,N


In [118]:
predicted_emissions_df.to_csv("../log/emissions-patterns-year.user", index=False)