<a href="https://colab.research.google.com/github/ConstanceDws/DCASE_2023/blob/main/DCASE23_Carbon_Metrics_Turorial.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 🌱 Monitoring environmental impact of DCASE systems : Hands-On Tutorial ▶

In [None]:
# Install packages
%%capture
!pip install torch==1.13.1
!pip install torchaudio==0.13.1
!pip install codecarbon
!pip install carbontracker
!pip install pyJoules
!pip install thop
!pip install deepspeed

In [None]:
# Check GPU configuration.
# If you get an error, check if the running device by going to Runtime -> Change Runtime type -> T4 GPU.
!nvidia-smi

Wed Sep 20 07:49:56 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.105.17   Driver Version: 525.105.17   CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   71C    P0    32W /  70W |   9065MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

#Before going any further, get access to the dataset and drag and drop it in your Drive :

# https://drive.google.com/drive/folders/1hEzIWq3F-ycoEMcBxkzmJlqbGLWrQz21?usp=sharing

In [None]:
# All needed imports
import torch
import torchaudio
import torchaudio.transforms as T
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import os
import numpy as np
import random

In [None]:
# Mount Google Drive to access your data
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Specify the path to your Google Drive folder containing audio files
google_drive_path = '/content/drive/My Drive/DCASE23_Tutorial/'

In [None]:
# Create a dataset with the specified data folder
data_folder = os.path.join(google_drive_path, 'DESED_public_eval_sample')
metadata_file = os.path.join(google_drive_path, 'public_sample.tsv')
print(f"data folder: {data_folder}")
print(f"data folder: {metadata_file}")

data folder: /content/drive/My Drive/DCASE23_Tutorial/DESED_public_eval_sample
data folder: /content/drive/My Drive/DCASE23_Tutorial/public_sample.tsv


In [None]:
from collections import OrderedDict

# Define the label-to-number mapping dictionary
classes_labels = OrderedDict(
    {
        "Alarm_bell_ringing": 0,
        "Blender": 1,
        "Cat": 2,
        "Dishes": 3,
        "Dog": 4,
        "Electric_shaver_toothbrush": 5,
        "Frying": 6,
        "Running_water": 7,
        "Speech": 8,
        "Vacuum_cleaner": 9,
    }
)

# Create a data loader
batch_size = 32
fs = 44100

target_len = 10 * fs

In [None]:
import pandas as pd

# Initialize an empty metadata dictionary
metadata = {}

# Read the metadata file (weak.tsv) while skipping the first line (headers)
with open(metadata_file, 'r') as file:
    next(file)  # Skip the first line (headers)
    for line in file:
        parts = line.strip().split()
        filename = parts[0]

        # Extract all event labels from the line
        event_labels = parts[1].split(',')

        # Map each event label to a number and store in a list
        encoded_event_labels = [classes_labels.get(label, -1) for label in event_labels]

        # Add the filename and encoded event labels to the metadata dictionary
        metadata[filename] = encoded_event_labels

# Create the dataset with metadata and transform - need to change this
#transform = T.MFCC(sample_rate=16000, n_mfcc=13)

In [None]:
# padding the audio
def pad_audio(waveform):

  if waveform.shape[-1] < target_len:
    waveform = torch.nn.functional.pad(
        waveform, (0, target_len - waveform.shape[-1]), mode="constant")

  elif len(waveform) > target_len:
    rand_onset = random.randint(0, len(waveform) - target_len)
    waveform = waveform[rand_onset:rand_onset + target_len]

  return waveform

In [None]:
def to_mono(waveform):
  if waveform.shape[0] > 1:
    indx = np.random.randint(0, waveform.shape[0] - 1)
    waveform = waveform[indx]
    waveform = waveform.unsqueeze(0)
  return waveform

In [None]:
from torchaudio.transforms import MelSpectrogram

class WeakDataset(Dataset):
    def __init__(self, data_folder, metadata, target_len, transform=None):
        self.data_folder = data_folder
        self.metadata = metadata
        self.file_list = os.listdir(data_folder)
        self.transform = MelSpectrogram(
            sample_rate=44100,
            n_fft=2048,
            win_length=2048,
            hop_length=256,
            f_min=0,
            f_max=22050,
            n_mels=128,
            window_fn=torch.hamming_window,
            wkwargs={"periodic": False},
            power=1,
        )
        self.target_len = target_len

    def __len__(self):
        return len(self.file_list)

    def __getitem__(self, idx):
        filename = self.file_list[idx]

        file_path = os.path.join(self.data_folder, filename)
        waveform, sample_rate = torchaudio.load(file_path)

        # only one channel
        waveform = to_mono(waveform)

        # pad audio
        waveform = pad_audio(waveform)

        if self.transform:
            waveform = self.transform(waveform)


        event_label = self.metadata[filename]

        return waveform, event_label

In [None]:
#Define the CRNN model
class CRNN(nn.Module):
    def __init__(self, num_classes):
        super(CRNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.conv2 = nn.Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.rnn = nn.GRU(input_size=128, hidden_size=64, num_layers=1, batch_first=True)
        self.fc = nn.Linear(64, num_classes)
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = torch.relu(x)
        x = torch.max_pool2d(x, kernel_size=(2, 2))
        x = self.conv2(x)
        x = torch.relu(x)
        x = torch.max_pool2d(x, kernel_size=(2, 2))
        x = x.view(x.size(0), -1, x.size(1))
        x, _ = self.rnn(x)
        x = self.fc(x[:, -1, :])
        x = self.softmax(x)
        return x

In [None]:
dataset = WeakDataset(data_folder, metadata, target_len)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

## Use of [Deepspeed](https://www.deepspeed.ai/tutorials/flops-profiler/)

In [None]:
import torch
from deepspeed.profiling.flops_profiler import get_model_profile

model = CRNN(num_classes=10)
shape = (1, 1, 128, 1723)

flops, macs, params = get_model_profile(model=model, input_shape=(shape))

[2023-09-20 07:50:02,282] [INFO] [profiler.py:1205:get_model_profile] Flops profiler warming-up...
[2023-09-20 07:50:03,731] [INFO] [profiler.py:80:start_profile] Flops profiler started

-------------------------- DeepSpeed Flops Profiler --------------------------
Profile Summary at step 1:
Notations:
data parallel size (dp_size), model parallel size(mp_size),
number of parameters (params), number of multiply-accumulate operations(MACs),
number of floating-point operations (flops), floating-point operations per second (FLOPS),
fwd latency (forward propagation latency), bwd latency (backward propagation latency),
step (weights update latency), iter latency (sum of fwd, bwd and step latency)

params per GPU:                                                         112.39 K
params of model = params per GPU * mp_size:                             0       
fwd MACs per GPU:                                                       4.19 GMACs
fwd flops per GPU:                                    

## Use of [THOP](https://pypi.org/project/thop/)


In [None]:
from thop import profile, clever_format

input = torch.randn(shape)

macs, params = profile(model, inputs=(input,))

macs, params = clever_format([macs, params], "%.3f")
print(f"MACS: {macs} PARAMS:{params}")

[INFO] Register count_convNd() for <class 'torch.nn.modules.conv.Conv2d'>.
[INFO] Register count_gru() for <class 'torch.nn.modules.rnn.GRU'>.
[INFO] Register count_linear() for <class 'torch.nn.modules.linear.Linear'>.
[INFO] Register count_softmax() for <class 'torch.nn.modules.activation.Softmax'>.
MACS: 4.708G PARAMS:112.394K


## Use [CodeCarbon](https://github.com/mlco2/codecarbon)

In [None]:
# Energy Consumption from inference using CodeCarbon
from codecarbon import EmissionsTracker

# Specify device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move the model to the selected device
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# code carbon tracker
tracker_code_carbon = EmissionsTracker()
tracker_code_carbon.start()

# Training loop
num_epochs = 20

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for (inputs, labels) in data_loader:

      inputs = inputs.to(device)
      labels = labels[0]
      labels = labels.to(device)

      optimizer.zero_grad()

      outputs = model(inputs)

      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(data_loader)}')

emissions_code_carbon = tracker_code_carbon.stop()

print('Training complete.')
print(f"Emissions: {emissions_code_carbon} kg")

[codecarbon INFO @ 07:50:05] [setup] RAM Tracking...
[codecarbon INFO @ 07:50:05] [setup] GPU Tracking...
[codecarbon INFO @ 07:50:05] Tracking Nvidia GPU via pynvml
[codecarbon INFO @ 07:50:06] [setup] CPU Tracking...
[codecarbon INFO @ 07:50:07] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.30GHz
[codecarbon INFO @ 07:50:07] >>> Tracker's metadata:
[codecarbon INFO @ 07:50:07]   Platform system: Linux-5.15.109+-x86_64-with-glibc2.35
[codecarbon INFO @ 07:50:07]   Python version: 3.10.12
[codecarbon INFO @ 07:50:07]   CodeCarbon version: 2.3.1
[codecarbon INFO @ 07:50:07]   Available RAM : 12.678 GB
[codecarbon INFO @ 07:50:07]   CPU count: 2
[codecarbon INFO @ 07:50:07]   CPU model: Intel(R) Xeon(R) CPU @ 2.30GHz
[codecarbon INFO @ 07:50:07]   GPU count: 1
[codecarbon INFO @ 07:50:07]   GPU model: 1 x Tesla T4


Epoch 1/20, Loss: 2.301406443119049
Epoch 2/20, Loss: 2.2891581058502197


[codecarbon INFO @ 07:50:23] Energy consumed for RAM : 0.000020 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:50:23] Energy consumed for all GPUs : 0.000205 kWh. Total GPU Power : 49.07669187819857 W
[codecarbon INFO @ 07:50:23] Energy consumed for all CPUs : 0.000177 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:50:23] 0.000402 kWh of electricity used since the beginning.


Epoch 3/20, Loss: 2.276199698448181
Epoch 4/20, Loss: 2.274015963077545
Epoch 5/20, Loss: 2.225351095199585


[codecarbon INFO @ 07:50:38] Energy consumed for RAM : 0.000040 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:50:38] Energy consumed for all GPUs : 0.000419 kWh. Total GPU Power : 51.5110830658931 W
[codecarbon INFO @ 07:50:38] Energy consumed for all CPUs : 0.000354 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:50:38] 0.000813 kWh of electricity used since the beginning.


Epoch 6/20, Loss: 2.2511150240898132
Epoch 7/20, Loss: 2.2589438557624817
Epoch 8/20, Loss: 2.2519461512565613


[codecarbon INFO @ 07:50:53] Energy consumed for RAM : 0.000059 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:50:53] Energy consumed for all GPUs : 0.000634 kWh. Total GPU Power : 51.766035076577694 W
[codecarbon INFO @ 07:50:53] Energy consumed for all CPUs : 0.000531 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:50:53] 0.001225 kWh of electricity used since the beginning.


Epoch 9/20, Loss: 2.205993413925171
Epoch 10/20, Loss: 2.2528743147850037
Epoch 11/20, Loss: 2.2404876351356506


[codecarbon INFO @ 07:51:08] Energy consumed for RAM : 0.000079 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:51:08] Energy consumed for all GPUs : 0.000849 kWh. Total GPU Power : 51.63764676647107 W
[codecarbon INFO @ 07:51:08] Energy consumed for all CPUs : 0.000708 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:51:08] 0.001637 kWh of electricity used since the beginning.


Epoch 12/20, Loss: 2.226578950881958
Epoch 13/20, Loss: 2.2285507321357727
Epoch 14/20, Loss: 2.1439427733421326


[codecarbon INFO @ 07:51:23] Energy consumed for RAM : 0.000099 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:51:23] Energy consumed for all GPUs : 0.001062 kWh. Total GPU Power : 50.97124102838247 W
[codecarbon INFO @ 07:51:23] Energy consumed for all CPUs : 0.000886 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:51:23] 0.002047 kWh of electricity used since the beginning.


Epoch 15/20, Loss: 2.200872004032135
Epoch 16/20, Loss: 2.201626479625702
Epoch 17/20, Loss: 2.2519859671592712


[codecarbon INFO @ 07:51:38] Energy consumed for RAM : 0.000119 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:51:38] Energy consumed for all GPUs : 0.001275 kWh. Total GPU Power : 51.171948736857956 W
[codecarbon INFO @ 07:51:38] Energy consumed for all CPUs : 0.001063 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:51:38] 0.002457 kWh of electricity used since the beginning.


Epoch 18/20, Loss: 2.247547686100006
Epoch 19/20, Loss: 2.1497440338134766


[codecarbon INFO @ 07:51:48] Energy consumed for RAM : 0.000133 kWh. RAM Power : 4.754403591156006 W
[codecarbon INFO @ 07:51:48] Energy consumed for all GPUs : 0.001429 kWh. Total GPU Power : 51.54785716748076 W
[codecarbon INFO @ 07:51:48] Energy consumed for all CPUs : 0.001190 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 07:51:48] 0.002751 kWh of electricity used since the beginning.


Epoch 20/20, Loss: 2.196936070919037
Training complete.
Emissions: 0.001245372463993936 kg


## Use of [Carbontracker](https://github.com/lfwa/carbontracker)

In [None]:
# Energy Consumption from inference using CarbonTracker
from carbontracker.tracker import CarbonTracker

num_epochs = 20

# Specify device (CPU or GPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Move the model to the selected device
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# code carbon tracker
tracker = CarbonTracker(epochs=num_epochs)

# Training loop
for epoch in range(num_epochs):
    tracker.epoch_start()
    model.train()
    running_loss = 0.0

    for (inputs, labels) in data_loader:

      inputs = inputs.to(device)
      labels = labels[0]
      labels = labels.to(device)

      optimizer.zero_grad()

      outputs = model(inputs)

      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item()
    tracker.epoch_end()

tracker.stop()

print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(data_loader)}')

print('Training complete.')

CarbonTracker: The following components were found: GPU with device(s) Tesla T4.
CarbonTracker: The following components were found: GPU with device(s) Tesla T4.
CarbonTracker: Average carbon intensity during training was 357.20 gCO2/kWh at detected location: Council Bluffs, Iowa, US.
CarbonTracker: Average carbon intensity during training was 357.20 gCO2/kWh at detected location: Council Bluffs, Iowa, US.
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:00:05
	Energy:	0.000108883678 kWh
	CO2eq:	0.038893478597 g
	This is equivalent to:
	0.000361799801 km travelled by car
CarbonTracker: 
Actual consumption for 1 epoch(s):
	Time:	0:00:05
	Energy:	0.000108883678 kWh
	CO2eq:	0.038893478597 g
	This is equivalent to:
	0.000361799801 km travelled by car
CarbonTracker: Live carbon intensity could not be fetched at detected location: Council Bluffs, Iowa, US. Defaulted to average carbon intensity for US in 2021 of 357.20 gCO2/kWh. at detected location: Council Bluffs, Iowa, US.
Carbo

## Use [PyJoules](https://github.com/powerapi-ng/pyJoules)

In [None]:
from pyJoules.energy_meter import measure_energy
from pyJoules.handler.csv_handler import CSVHandler

@measure_energy()
def train_loop(num_epochs, data_loader, device, optimizer, criterrion, loss):
  for (inputs, labels) in data_loader:

        inputs = inputs.to(device)
        labels = labels[0]
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)  # Add channel dimension

        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

  return loss

In [None]:
# Define the CRNN model
model = CRNN(num_classes=10)

# Move the model to the selected device
model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 20

for epoch in range(num_epochs):

    model.train()
    running_loss = 0.0

    train_loop(num_epochs, data_loader, device, optimizer, criterion, loss)

    running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(data_loader)}')

print('Training complete.')



begin timestamp : 1695196515.2832465; tag : train_loop; duration : 3.9616856575012207; nvidia_gpu_0 : 208155
Epoch 1/20, Loss: 0.41296249628067017
begin timestamp : 1695196519.2744637; tag : train_loop; duration : 4.41056752204895; nvidia_gpu_0 : 226366
Epoch 2/20, Loss: 0.41296249628067017
begin timestamp : 1695196523.7058737; tag : train_loop; duration : 4.466010332107544; nvidia_gpu_0 : 224584
Epoch 3/20, Loss: 0.41296249628067017
begin timestamp : 1695196528.2014816; tag : train_loop; duration : 3.786639451980591; nvidia_gpu_0 : 205291
Epoch 4/20, Loss: 0.41296249628067017
begin timestamp : 1695196532.0168674; tag : train_loop; duration : 3.7673113346099854; nvidia_gpu_0 : 204570
Epoch 5/20, Loss: 0.41296249628067017
begin timestamp : 1695196535.8147736; tag : train_loop; duration : 5.019988775253296; nvidia_gpu_0 : 253521
Epoch 6/20, Loss: 0.41296249628067017
begin timestamp : 1695196540.8559875; tag : train_loop; duration : 3.8286290168762207; nvidia_gpu_0 : 204288
Epoch 7/20, Lo