# Summary of the carbon emissions

In [None]:
from datetime import datetime, timedelta
import pandas as pd

In [None]:
def read_emissions(filepath: str):
    return pd.read_csv(filepath)

def predict_aggregated_emissions(emissions_df, timespan, predicted_timespan):
    aggregated_emissions_df = emissions_df.groupby(['project_name']).mean().reset_index()
    scale = predicted_timespan/timespan
    predicted_emissions_df = pd.DataFrame(index=aggregated_emissions_df.index)
    predicted_emissions_df['timestamp'] = emissions_df['timestamp'].max()
    predicted_emissions_df['project_name'] = aggregated_emissions_df['project_name']
    predicted_emissions_df['run_id'] = emissions_df['run_id'].max()
    predicted_emissions_df['duration'] = scale * aggregated_emissions_df['duration']
    predicted_emissions_df['emissions'] = scale * aggregated_emissions_df['emissions']
    predicted_emissions_df['emissions_rate'] = emissions_df['emissions_rate'].mode()[0]
    predicted_emissions_df['cpu_power'] = scale * aggregated_emissions_df['cpu_power']
    predicted_emissions_df['gpu_power'] = scale * aggregated_emissions_df['gpu_power']
    predicted_emissions_df['ram_power'] = scale * aggregated_emissions_df['ram_power']
    predicted_emissions_df['cpu_energy'] = scale * aggregated_emissions_df['cpu_energy']
    predicted_emissions_df['gpu_energy'] = scale * aggregated_emissions_df['gpu_energy']
    predicted_emissions_df['ram_energy'] = scale * aggregated_emissions_df['ram_energy']
    predicted_emissions_df['energy_consumed'] = scale * aggregated_emissions_df['energy_consumed']
    predicted_emissions_df['country_name'] = emissions_df['country_name'].mode()[0]
    predicted_emissions_df['country_iso_code'] = emissions_df['country_iso_code'].mode()[0]
    predicted_emissions_df['region'] = emissions_df['region'].mode()
    predicted_emissions_df['cloud_provider'] = emissions_df['cloud_provider'].mode()
    predicted_emissions_df['cloud_region'] = emissions_df['cloud_region'].mode()
    predicted_emissions_df['os'] = emissions_df['os'].mode()[0]
    predicted_emissions_df['python_version'] = emissions_df['python_version'].mode()[0]
    predicted_emissions_df['cpu_count'] = emissions_df['cpu_count'].mode()[0]
    predicted_emissions_df['cpu_model'] = emissions_df['cpu_model'].mode()[0]
    predicted_emissions_df['gpu_count'] = emissions_df['gpu_count'].mode()[0]
    predicted_emissions_df['gpu_model'] = emissions_df['gpu_model'].mode()[0]
    predicted_emissions_df['longitude'] = emissions_df['longitude'].mode()
    predicted_emissions_df['latitude'] = emissions_df['latitude'].mode()
    predicted_emissions_df['ram_total_size'] = emissions_df['ram_total_size'].mode()[0]
    predicted_emissions_df['tracking_mode'] = emissions_df['tracking_mode'].mode()[0]
    predicted_emissions_df['on_cloud'] = emissions_df['on_cloud'].mode()[0]    
    return predicted_emissions_df

def predict_emissions(emissions_df, timespan, predicted_timespan):
    scale = predicted_timespan/timespan
    predicted_emissions_df = emissions_df.copy()    
    predicted_emissions_df['duration'] = scale * emissions_df['duration']
    predicted_emissions_df['emissions'] = scale * emissions_df['emissions']
    predicted_emissions_df['cpu_power'] = scale * emissions_df['cpu_power']
    predicted_emissions_df['gpu_power'] = scale * emissions_df['gpu_power']
    predicted_emissions_df['ram_power'] = scale * emissions_df['ram_power']
    predicted_emissions_df['cpu_energy'] = scale * emissions_df['cpu_energy']
    predicted_emissions_df['gpu_energy'] = scale * emissions_df['gpu_energy']
    predicted_emissions_df['ram_energy'] = scale * emissions_df['ram_energy']
    predicted_emissions_df['energy_consumed'] = scale * emissions_df['energy_consumed']
    return predicted_emissions_df

def repeat_emissions(emissions_df, total_count):
    scale = int(total_count/emissions_df.shape[0])
    predicted_emissions_df = emissions_df.loc[emissions_df.index.repeat(scale)].reset_index(drop=True)
    if predicted_emissions_df.shape[0] < total_count:
        predicted_emissions_df = pd.concat([predicted_emissions_df, predicted_emissions_df[:total_count - predicted_emissions_df.shape[0]]])
    return predicted_emissions_df

In [None]:
emissions_df = read_emissions("../log/emissions.user")
timespan = timedelta(minutes=1)
predicted_timespan = timedelta(hours=24)
predicted_emissions_df = predict_emissions(emissions_df, timespan, predicted_timespan)
predicted_emissions_df = repeat_emissions(predicted_emissions_df, 365)
predicted_emissions_df

In [None]:
predicted_emissions_df.to_csv("../log/emissions-year.user", index=False)
#read_emissions("../log/emissions-year.user")

In [None]:
emissions_df.describe()

In [None]:
predicted_emissions_df.describe()

In [None]:
for col in emissions_df.columns:
    print(col)

In [None]:
for col in predicted_emissions_df.columns:
    print(col)