# Data analysis

## Inputs
energibridge:
- For each runtime:
    - For each model:
        - energibridge_runtime_model.csv

nvidia-smi:
- gpu_results.csv

## Outputs
- general_dataset.csv
    - columns: time, runtime, model, cpu_energy, ...
    - descriptive analysis
    - statistical analysis

In [23]:
pwd

'D:\\GAISSA\\energy-repo\\last_repo'

In [5]:
cd ..

D:\GAISSA\energy-repo\last_repo


In [6]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os

In [55]:
models = ['codet5-base',
 'codet5p-220m',
 'codeparrot-small',
 'pythia-410m']

In [56]:
results_dir = 'results/example/'

In [57]:
os.listdir(results_dir)

['energibridge_all.csv',
 'energibridge_onnx_m1.csv',
 'energibridge_onnx_m2.csv',
 'energibridge_onnx_m3.csv',
 'gpu_results.csv']

In [64]:
# normalize time and columns

name_mapping = {"DRAM_ENERGY (J)" : "dram_energy",
                "PACKAGE_ENERGY (J)":'package_energy',
               'PP0_ENERGY (J)':"pp0_energy",
               'PP1_ENERGY (J)':"pp1_energy"}
df1 = df1.rename(columns=name_mapping)
df1.columns = df1.columns.str.lower() # to lowcase
df1.columns

name_mapping = { ' name': 'name',
                " utilization.gpu [%]" : "utilization_gpu",
                " utilization.memory [%]":'utilization_memory',
               ' memory.total [MiB]':"memory_total",
               ' memory.used [MiB]':"memory_used",
               ' power.draw [W]':'power_draw',
               ' power.max_limit [W]':"power_max_limit",
               ' temperature.gpu':"temperature_gpu",}
df2 = df2.rename(columns=name_mapping)
df2.columns = df2.columns.str.lower() # to lowcase
df2.columns

# Convert timestamps from string to datetime
df2['time'] = pd.to_datetime(df2['timestamp'])

# Assuming 'Time' in df2 needs to be converted or synchronized
# This might require more specific handling depending on the origin of these timestamps
# For example, converting UNIX time to datetime:
# df1['time'] = pd.to_datetime(df1['time'], unit='ms') 
df1['time'] = pd.to_datetime(df1['time'])

In [68]:
import pandas as pd
import os

# Directory containing the CSV files
csv_dir = results_dir

# Initialize an empty list to hold dataframes
dfs = []

# Loop through the files in the directory
for file in os.listdir(csv_dir):
    if file.endswith('.csv') and file.startswith('energibridge'):
        print(f"file: {file}")
        
        # Parse the filename to get runtime and model
        filename_parts = file.split('_')
        runtime = filename_parts[1]
        model = filename_parts[2].split('.')[0]

        # Load the CSV file into a dataframe
        df = pd.read_csv(os.path.join(csv_dir, file))

        # Add 'runtime' and 'model' columns
        df['runtime'] = runtime
        df['model'] = model

        # Append the dataframe to the list
        dfs.append(df)
        print(df.columns)

# Concatenate all dataframes in the list
combined_df = pd.concat(dfs, ignore_index=True)

# Display the combined dataframe
combined_df

file: energibridge_onnx_m1.csv
Index(['time', 'cpu_frequency_0', 'cpu_frequency_1', 'cpu_frequency_2',
       'cpu_frequency_3', 'cpu_frequency_4', 'cpu_frequency_5',
       'cpu_frequency_6', 'cpu_frequency_7', 'cpu_usage_0', 'cpu_usage_1',
       'cpu_usage_2', 'cpu_usage_3', 'cpu_usage_4', 'cpu_usage_5',
       'cpu_usage_6', 'cpu_usage_7', 'dram_energy', 'package_energy',
       'pp0_energy', 'pp1_energy', 'total_memory', 'total_swap', 'used_memory',
       'used_swap', 'runtime', 'model'],
      dtype='object')
file: energibridge_onnx_m2.csv
Index(['time', 'cpu_frequency_0', 'cpu_frequency_1', 'cpu_frequency_2',
       'cpu_frequency_3', 'cpu_frequency_4', 'cpu_frequency_5',
       'cpu_frequency_6', 'cpu_frequency_7', 'cpu_usage_0', 'cpu_usage_1',
       'cpu_usage_2', 'cpu_usage_3', 'cpu_usage_4', 'cpu_usage_5',
       'cpu_usage_6', 'cpu_usage_7', 'dram_energy', 'package_energy',
       'pp0_energy', 'pp1_energy', 'total_memory', 'total_swap', 'used_memory',
       'used_swap',

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,dram_energy,package_energy,pp0_energy,pp1_energy,total_memory,total_swap,used_memory,used_swap,runtime,model
0,2024-04-30 19:06:23.773,2112,2112,2112,2112,2112,2112,2112,2112,18.218971,...,4428.83606,36189.687073,28878.376709,387.73761,17005068288,12348030976,13086830592,2029027328,onnx,m1
1,2024-04-30 19:06:28.773,2112,2112,2112,2112,2112,2112,2112,2112,99.582047,...,4428.843262,36189.743286,28878.416626,387.73761,17005068288,12348030976,13086683136,2030555136,onnx,m1
2,2024-04-30 19:06:33.773,2112,2112,2112,2112,2112,2112,2112,2112,12.057892,...,4428.995972,36192.827087,28881.175354,387.73761,17005068288,12348030976,13095006208,2044895232,onnx,m1
3,2024-04-30 19:06:38.773,2112,2112,2112,2112,2112,2112,2112,2112,2.324516,...,4429.126953,36193.230408,28881.278503,387.73761,17005068288,12348030976,13095006208,2044895232,onnx,m1
4,2024-04-30 19:06:43.773,2112,2112,2112,2112,2112,2112,2112,2112,3.775414,...,4429.266418,36193.605164,28881.451355,387.73761,17005068288,12348030976,13095088128,2044895232,onnx,m1
5,2024-04-30 19:06:48.773,2112,2112,2112,2112,2112,2112,2112,2112,13.105934,...,4429.448975,36195.215088,28882.697998,387.745422,17005068288,12348030976,13103407104,2059988992,onnx,m1
6,2024-04-30 19:06:53.773,2112,2112,2112,2112,2112,2112,2112,2112,99.582047,...,4428.843262,36189.743286,28878.416626,387.73761,17005068288,12348030976,13086683136,2030555136,onnx,m1
7,2024-04-30 19:06:58.773,2112,2112,2112,2112,2112,2112,2112,2112,12.057892,...,4428.995972,36192.827087,28881.175354,387.73761,17005068288,12348030976,13095006208,2044895232,onnx,m1
8,2024-04-30 19:07:03.773,2112,2112,2112,2112,2112,2112,2112,2112,2.324516,...,4429.126953,36193.230408,28881.278503,387.73761,17005068288,12348030976,13095006208,2044895232,onnx,m1
9,2024-04-30 19:07:08.773,2112,2112,2112,2112,2112,2112,2112,2112,3.775414,...,4429.266418,36193.605164,28881.451355,387.73761,17005068288,12348030976,13095088128,2044895232,onnx,m1


In [69]:
combined_df.columns

Index(['time', 'cpu_frequency_0', 'cpu_frequency_1', 'cpu_frequency_2',
       'cpu_frequency_3', 'cpu_frequency_4', 'cpu_frequency_5',
       'cpu_frequency_6', 'cpu_frequency_7', 'cpu_usage_0', 'cpu_usage_1',
       'cpu_usage_2', 'cpu_usage_3', 'cpu_usage_4', 'cpu_usage_5',
       'cpu_usage_6', 'cpu_usage_7', 'dram_energy', 'package_energy',
       'pp0_energy', 'pp1_energy', 'total_memory', 'total_swap', 'used_memory',
       'used_swap', 'runtime', 'model'],
      dtype='object')

In [70]:
#save general energibridge dataset
combined_df.to_csv(results_dir+'all_energibridge.csv', index=False)


## From all_energibridge and gpu_results -> to general

In [71]:
# creation of one dataset of energibridge files
dfs = []

In [82]:
energibridge_data = results_dir+ "all_energibridge.csv"
df1 = pd.read_csv(energibridge_data,)

nvidia_data = results_dir+"gpu_results.csv"
df2 = pd.read_csv(nvidia_data,)

In [83]:
df1.head()

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,dram_energy,package_energy,pp0_energy,pp1_energy,total_memory,total_swap,used_memory,used_swap,runtime,model
0,2024-04-30 19:06:23.773,2112,2112,2112,2112,2112,2112,2112,2112,18.218971,...,4428.83606,36189.687073,28878.376709,387.73761,17005068288,12348030976,13086830592,2029027328,onnx,m1
1,2024-04-30 19:06:28.773,2112,2112,2112,2112,2112,2112,2112,2112,99.582047,...,4428.843262,36189.743286,28878.416626,387.73761,17005068288,12348030976,13086683136,2030555136,onnx,m1
2,2024-04-30 19:06:33.773,2112,2112,2112,2112,2112,2112,2112,2112,12.057892,...,4428.995972,36192.827087,28881.175354,387.73761,17005068288,12348030976,13095006208,2044895232,onnx,m1
3,2024-04-30 19:06:38.773,2112,2112,2112,2112,2112,2112,2112,2112,2.324516,...,4429.126953,36193.230408,28881.278503,387.73761,17005068288,12348030976,13095006208,2044895232,onnx,m1
4,2024-04-30 19:06:43.773,2112,2112,2112,2112,2112,2112,2112,2112,3.775414,...,4429.266418,36193.605164,28881.451355,387.73761,17005068288,12348030976,13095088128,2044895232,onnx,m1


In [84]:
df2.head()

Unnamed: 0,timestamp,name,utilization.gpu [%],utilization.memory [%],memory.total [MiB],memory.used [MiB],power.draw [W],power.max_limit [W],temperature.gpu
0,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58
1,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56
2,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58
3,2024/04/30 19:06:36.782,NVIDIA GeForce RTX 3090,11 %,10 %,24576 MiB,6084 MiB,189.22 W,350.00 W,55
4,2024/04/30 19:06:41.783,NVIDIA GeForce RTX 3090,21 %,9 %,24576 MiB,6084 MiB,193.54 W,350.00 W,58


In [85]:
name_mapping = {"DRAM_ENERGY (J)" : "dram_energy",
                "PACKAGE_ENERGY (J)":'package_energy',
               'PP0_ENERGY (J)':"pp0_energy",
               'PP1_ENERGY (J)':"pp1_energy"}
df1 = df1.rename(columns=name_mapping)
df1.columns = df1.columns.str.lower() # to lowcase
df1.columns

Index(['time', 'cpu_frequency_0', 'cpu_frequency_1', 'cpu_frequency_2',
       'cpu_frequency_3', 'cpu_frequency_4', 'cpu_frequency_5',
       'cpu_frequency_6', 'cpu_frequency_7', 'cpu_usage_0', 'cpu_usage_1',
       'cpu_usage_2', 'cpu_usage_3', 'cpu_usage_4', 'cpu_usage_5',
       'cpu_usage_6', 'cpu_usage_7', 'dram_energy', 'package_energy',
       'pp0_energy', 'pp1_energy', 'total_memory', 'total_swap', 'used_memory',
       'used_swap', 'runtime', 'model'],
      dtype='object')

In [86]:
df2.columns

Index(['timestamp', ' name', ' utilization.gpu [%]', ' utilization.memory [%]',
       ' memory.total [MiB]', ' memory.used [MiB]', ' power.draw [W]',
       ' power.max_limit [W]', ' temperature.gpu'],
      dtype='object')

In [87]:
name_mapping = { ' name': 'name',
                " utilization.gpu [%]" : "utilization_gpu",
                " utilization.memory [%]":'utilization_memory',
               ' memory.total [MiB]':"memory_total",
               ' memory.used [MiB]':"memory_used",
               ' power.draw [W]':'power_draw',
               ' power.max_limit [W]':"power_max_limit",
               ' temperature.gpu':"temperature_gpu",}
df2 = df2.rename(columns=name_mapping)
df2.columns = df2.columns.str.lower() # to lowcase
df2.columns

Index(['timestamp', 'name', 'utilization_gpu', 'utilization_memory',
       'memory_total', 'memory_used', 'power_draw', 'power_max_limit',
       'temperature_gpu'],
      dtype='object')

In [88]:
# Convert timestamps from string to datetime
df2['time'] = pd.to_datetime(df2['timestamp'])

# Assuming 'Time' in df2 needs to be converted or synchronized
# This might require more specific handling depending on the origin of these timestamps
# For example, converting UNIX time to datetime:
# df1['time'] = pd.to_datetime(df1['time'], unit='ms') 
df1['time'] = pd.to_datetime(df1['time'])

In [93]:
df2.head()

Unnamed: 0,timestamp,name,utilization_gpu,utilization_memory,memory_total,memory_used,power_draw,power_max_limit,temperature_gpu,time
0,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58,2024-04-30 19:06:21.773
1,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56,2024-04-30 19:06:26.776
2,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58,2024-04-30 19:06:31.778
3,2024/04/30 19:06:36.782,NVIDIA GeForce RTX 3090,11 %,10 %,24576 MiB,6084 MiB,189.22 W,350.00 W,55,2024-04-30 19:06:36.782
4,2024/04/30 19:06:41.783,NVIDIA GeForce RTX 3090,21 %,9 %,24576 MiB,6084 MiB,193.54 W,350.00 W,58,2024-04-30 19:06:41.783


In [66]:
#df3 = df1.copy()
# Add 4 minutes to each timestamp
#df3['time'] += pd.Timedelta(minutes=6)

In [20]:
pwd

'D:\\GAISSA\\energy-repo\\last_repo'

In [94]:
#df3.to_csv('results/energibridge_example3.csv', index=False)
#df1.to_csv('results/energibridge_onnx_m1.csv', index=False)


In [95]:
# Apply the condition to update the 'runtime' column
#df.loc[(df['timestamp'] >= start_time) & (df['timestamp'] <= end_time), 'runtime'] = 'runtime1'


In [134]:
# Concatenate and sort
general_df = pd.concat([df1, df2,], ignore_index=True)
general_df = combined_df.sort_values(by='time')
general_df.to_csv(results_dir + 'general.csv', index=False)
general_df

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,model,timestamp,name,utilization_gpu,utilization_memory,memory_total,memory_used,power_draw,power_max_limit,temperature_gpu
48,2024-04-30 19:06:21.773,,,,,,,,,,...,,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58.0
0,2024-04-30 19:06:23.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,18.218971,...,m1,,,,,,,,,
49,2024-04-30 19:06:26.776,,,,,,,,,,...,,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56.0
1,2024-04-30 19:06:28.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,99.582047,...,m1,,,,,,,,,
50,2024-04-30 19:06:31.778,,,,,,,,,,...,,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,2024-04-30 19:14:36.969,,,,,,,,,,...,,2024/04/30 19:14:36.969,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.21 W,350.00 W,40.0
148,2024-04-30 19:14:41.971,,,,,,,,,,...,,2024/04/30 19:14:41.971,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.25 W,350.00 W,39.0
149,2024-04-30 19:14:46.972,,,,,,,,,,...,,2024/04/30 19:14:46.972,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.32 W,350.00 W,39.0
150,2024-04-30 19:14:51.974,,,,,,,,,,...,,2024/04/30 19:14:51.974,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.29 W,350.00 W,39.0


In [150]:
# reorder index
general_df = pd.read_csv(results_dir+"general.csv",)

In [151]:
general_df

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,model,timestamp,name,utilization_gpu,utilization_memory,memory_total,memory_used,power_draw,power_max_limit,temperature_gpu
0,2024-04-30 19:06:21.773,,,,,,,,,,...,,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58.0
1,2024-04-30 19:06:23.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,18.218971,...,m1,,,,,,,,,
2,2024-04-30 19:06:26.776,,,,,,,,,,...,,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56.0
3,2024-04-30 19:06:28.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,99.582047,...,m1,,,,,,,,,
4,2024-04-30 19:06:31.778,,,,,,,,,,...,,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,2024-04-30 19:14:36.969,,,,,,,,,,...,,2024/04/30 19:14:36.969,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.21 W,350.00 W,40.0
148,2024-04-30 19:14:41.971,,,,,,,,,,...,,2024/04/30 19:14:41.971,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.25 W,350.00 W,39.0
149,2024-04-30 19:14:46.972,,,,,,,,,,...,,2024/04/30 19:14:46.972,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.32 W,350.00 W,39.0
150,2024-04-30 19:14:51.974,,,,,,,,,,...,,2024/04/30 19:14:51.974,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.29 W,350.00 W,39.0


In [153]:
df = general_df.copy()
# Create a new column for the runtime that was running for Nvidia results
#df['runtime'] = None

# Update 'running_runtime' for Nvidia results based on the next row's runtime
for i in range(len(df) - 1):
    if 'NVIDIA' in str(df.at[i, 'name']):
        if pd.notna(df.at[i + 1, 'runtime']):
            df.at[i, 'runtime'] = df.at[i + 1, 'runtime']
        else:
            df.at[i, 'runtime'] = 'None'

# Handle the last row separately if it contains "NVIDIA" in the name
if 'NVIDIA' in str(df.at[len(df) - 1, 'name']):
    df.at[len(df) - 1, 'runtime'] = 'None'

# Save the modified dataframe to a new CSV file
output_path = results_dir + 'modified_general_nvidia.csv'
df.to_csv(output_path, index=False)

# Display the modified dataframe
df.head()

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,model,timestamp,name,utilization_gpu,utilization_memory,memory_total,memory_used,power_draw,power_max_limit,temperature_gpu
0,2024-04-30 19:06:21.773,,,,,,,,,,...,,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58.0
1,2024-04-30 19:06:23.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,18.218971,...,m1,,,,,,,,,
2,2024-04-30 19:06:26.776,,,,,,,,,,...,,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56.0
3,2024-04-30 19:06:28.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,99.582047,...,m1,,,,,,,,,
4,2024-04-30 19:06:31.778,,,,,,,,,,...,,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58.0


In [99]:
# fill missing values, identify for the gpu_results the runtime that was running (one datapoint above)

In [154]:
df['runtime']

0      onnx
1      onnx
2      onnx
3      onnx
4      onnx
       ... 
147    None
148    None
149    None
150    None
151    None
Name: runtime, Length: 152, dtype: object

In [159]:
# Fill missing values using forward fill
df_filled = df.ffill()

# Display the first few rows to verify
print(df_filled['runtime'])

0      onnx
1      onnx
2      onnx
3      onnx
4      onnx
       ... 
147    None
148    None
149    None
150    None
151    None
Name: runtime, Length: 152, dtype: object


In [160]:
df

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,model,timestamp,name,utilization_gpu,utilization_memory,memory_total,memory_used,power_draw,power_max_limit,temperature_gpu
0,2024-04-30 19:06:21.773,,,,,,,,,,...,,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58.0
1,2024-04-30 19:06:23.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,18.218971,...,m1,,,,,,,,,
2,2024-04-30 19:06:26.776,,,,,,,,,,...,,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56.0
3,2024-04-30 19:06:28.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,99.582047,...,m1,,,,,,,,,
4,2024-04-30 19:06:31.778,,,,,,,,,,...,,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,2024-04-30 19:14:36.969,,,,,,,,,,...,,2024/04/30 19:14:36.969,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.21 W,350.00 W,40.0
148,2024-04-30 19:14:41.971,,,,,,,,,,...,,2024/04/30 19:14:41.971,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.25 W,350.00 W,39.0
149,2024-04-30 19:14:46.972,,,,,,,,,,...,,2024/04/30 19:14:46.972,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.32 W,350.00 W,39.0
150,2024-04-30 19:14:51.974,,,,,,,,,,...,,2024/04/30 19:14:51.974,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.29 W,350.00 W,39.0


In [161]:
# Fill NaN values according to the previous row
df_filled = df.fillna(method='ffill')
df_filled

Unnamed: 0,time,cpu_frequency_0,cpu_frequency_1,cpu_frequency_2,cpu_frequency_3,cpu_frequency_4,cpu_frequency_5,cpu_frequency_6,cpu_frequency_7,cpu_usage_0,...,model,timestamp,name,utilization_gpu,utilization_memory,memory_total,memory_used,power_draw,power_max_limit,temperature_gpu
0,2024-04-30 19:06:21.773,,,,,,,,,,...,,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58.0
1,2024-04-30 19:06:23.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,18.218971,...,m1,2024/04/30 19:06:21.773,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,183.90 W,350.00 W,58.0
2,2024-04-30 19:06:26.776,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,18.218971,...,m1,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56.0
3,2024-04-30 19:06:28.773,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,99.582047,...,m1,2024/04/30 19:06:26.776,NVIDIA GeForce RTX 3090,48 %,24 %,24576 MiB,6084 MiB,188.06 W,350.00 W,56.0
4,2024-04-30 19:06:31.778,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,99.582047,...,m1,2024/04/30 19:06:31.778,NVIDIA GeForce RTX 3090,35 %,15 %,24576 MiB,6084 MiB,197.91 W,350.00 W,58.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
147,2024-04-30 19:14:36.969,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,13.105934,...,m3,2024/04/30 19:14:36.969,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.21 W,350.00 W,40.0
148,2024-04-30 19:14:41.971,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,13.105934,...,m3,2024/04/30 19:14:41.971,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.25 W,350.00 W,39.0
149,2024-04-30 19:14:46.972,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,13.105934,...,m3,2024/04/30 19:14:46.972,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.32 W,350.00 W,39.0
150,2024-04-30 19:14:51.974,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,2112.0,13.105934,...,m3,2024/04/30 19:14:51.974,NVIDIA GeForce RTX 3090,0 %,0 %,24576 MiB,6084 MiB,22.29 W,350.00 W,39.0


In [162]:
output_path = results_dir + 'modified_general_nvidia_filled.csv'
df_filled.to_csv(output_path, index=False)