In [1]:
# Importing packages
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pyarrow

In [2]:
path_P = "../21-01/year_month=21-01/plugin=ipmi_pub/metric=total_power/a_0.parquet"
dataset_P = pd.read_parquet(path_P, engine='pyarrow')

# Casting of node values into integers and timestamp into datetimes
dataset_P['node'] = dataset_P['node'].astype(int)
dataset_P['timestamp'] = pd.to_datetime(dataset_P['timestamp'])

print(dataset_P)

                          timestamp  value  node
0         2021-01-21 06:42:40+00:00    700   128
1         2021-01-21 06:43:00+00:00    700   128
2         2021-01-21 06:43:20+00:00    580   128
3         2021-01-21 06:43:40+00:00    700   128
4         2021-01-21 06:44:00+00:00    680   128
...                             ...    ...   ...
112525874 2021-01-22 16:19:00+00:00    540   968
112525875 2021-01-22 16:19:20+00:00    540   968
112525876 2021-01-22 16:19:40+00:00    560   968
112525877 2021-01-22 16:20:00+00:00    540   968
112525878 2021-01-22 16:20:20+00:00    540   968

[112525879 rows x 3 columns]


In [3]:
# Pivoting of the node rows into separated columns, keeping the timestamp column as index of the dataframe
dataset_pivoted = dataset_P.pivot(index='timestamp', columns='node')
dataset_pivoted.reset_index(inplace=True)
del(dataset_P)

dataset_pivoted['timestamp'] = pd.to_datetime(dataset_pivoted['timestamp'])
dataset_pivoted.set_index('timestamp', inplace=True)

In [4]:
# Substitution of the NaN values present using a linear interpolation method
for column in dataset_pivoted.columns:
    dataset_pivoted[column].interpolate(method='linear', inplace=True)

dataset_pivoted

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset_pivoted[column].interpolate(method='linear', inplace=True)


Unnamed: 0_level_0,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value
node,0,1,2,3,4,5,6,7,8,9,...,970,971,972,973,974,975,976,977,978,979
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-01-01 00:00:00+00:00,380.000000,420.0,420.0,360.0,440.0,400.0,440.0,420.0,420.0,420.0,...,740.0,440.0,500.0,480.0,460.000000,480.0,400.0,760.0,760.0,480.0
2021-01-01 00:00:20+00:00,360.000000,400.0,420.0,380.0,440.0,400.0,440.0,420.0,420.0,420.0,...,580.0,440.0,500.0,480.0,480.000000,460.0,400.0,700.0,600.0,480.0
2021-01-01 00:00:40+00:00,360.000000,400.0,420.0,360.0,440.0,400.0,440.0,420.0,420.0,420.0,...,620.0,440.0,520.0,480.0,480.000000,460.0,400.0,700.0,540.0,480.0
2021-01-01 00:01:00+00:00,360.000000,400.0,420.0,360.0,440.0,400.0,440.0,420.0,420.0,440.0,...,620.0,440.0,520.0,480.0,460.000000,480.0,400.0,680.0,640.0,480.0
2021-01-01 00:01:20+00:00,360.000000,400.0,420.0,380.0,440.0,400.0,440.0,420.0,420.0,420.0,...,860.0,460.0,520.0,500.0,480.000000,460.0,400.0,840.0,580.0,480.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-31 23:30:40+00:00,440.000000,400.0,400.0,360.0,420.0,400.0,440.0,420.0,420.0,420.0,...,500.0,560.0,500.0,420.0,480.000000,560.0,460.0,500.0,540.0,500.0
2021-01-31 23:31:00+00:00,420.000000,400.0,400.0,360.0,420.0,380.0,440.0,420.0,420.0,420.0,...,500.0,560.0,500.0,420.0,473.333333,560.0,560.0,500.0,480.0,500.0
2021-01-31 23:43:00+00:00,426.666667,400.0,410.0,360.0,420.0,380.0,440.0,420.0,420.0,420.0,...,500.0,560.0,500.0,420.0,466.666667,560.0,520.0,500.0,480.0,500.0
2021-01-31 23:43:20+00:00,433.333333,400.0,420.0,360.0,420.0,380.0,440.0,420.0,420.0,420.0,...,500.0,560.0,500.0,420.0,460.000000,560.0,480.0,500.0,480.0,500.0


In [5]:
# Resampling of the DataFrame on an hourly basis and calculate the sum
dataset_pivoted = dataset_pivoted.resample('1h').sum()

dataset_pivoted

Unnamed: 0_level_0,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value,value
node,0,1,2,3,4,5,6,7,8,9,...,970,971,972,973,974,975,976,977,978,979
timestamp,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2021-01-01 00:00:00+00:00,65920.0,72640.0,74980.0,65780.0,79100.0,71120.0,80220.0,75820.0,75620.0,75680.0,...,117020.000000,82480.000000,92580.0,86380.0,82660.0,82900.0,108260.0,136410.000000,114960.0,86400.0
2021-01-01 01:00:00+00:00,66480.0,73000.0,75060.0,65300.0,78780.0,70660.0,79960.0,75940.0,75760.0,75640.0,...,114460.000000,82560.000000,92640.0,86760.0,82620.0,83060.0,174660.0,135920.000000,116440.0,86080.0
2021-01-01 02:00:00+00:00,66300.0,72660.0,75100.0,65960.0,78840.0,71000.0,80040.0,76220.0,75880.0,75680.0,...,113980.000000,82860.000000,92220.0,86220.0,83240.0,83080.0,172000.0,136120.000000,119380.0,86040.0
2021-01-01 03:00:00+00:00,66220.0,72420.0,74980.0,65540.0,78800.0,71220.0,79940.0,76100.0,75800.0,75760.0,...,115720.000000,82580.000000,92580.0,86180.0,82400.0,82820.0,169940.0,136040.000000,116700.0,85940.0
2021-01-01 04:00:00+00:00,66560.0,72740.0,74840.0,65360.0,78840.0,71260.0,80040.0,76240.0,76000.0,75680.0,...,139140.000000,82920.000000,91360.0,86640.0,83020.0,82600.0,174400.0,135500.000000,83520.0,85800.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-01-31 19:00:00+00:00,12160.0,11220.0,11580.0,10140.0,12320.0,11160.0,12480.0,11980.0,11840.0,11760.0,...,15100.000000,13900.000000,15020.0,12870.0,14220.0,14560.0,14230.0,15160.000000,14750.0,14680.0
2021-01-31 20:00:00+00:00,9115.0,8400.0,8720.0,7660.0,9240.0,8340.0,9400.0,8880.0,8840.0,8820.0,...,10958.333333,10585.714286,11320.0,9640.0,11225.0,10300.0,10810.0,11712.857143,10770.0,10780.0
2021-01-31 21:00:00+00:00,23565.0,21600.0,22170.0,19560.0,23780.0,21360.0,24360.0,22840.0,22820.0,22700.0,...,28481.666667,27774.285714,29020.0,25340.0,28215.0,28020.0,28280.0,29727.142857,27310.0,27640.0
2021-01-31 22:00:00+00:00,7420.0,6800.0,7060.0,6160.0,7480.0,6740.0,7620.0,7240.0,7200.0,7180.0,...,8920.000000,9080.000000,9180.0,7860.0,8620.0,8700.0,8680.0,8800.000000,8480.0,8880.0


In [8]:
# Dataset exporting as a CSV file for later use
output_file = 'PowerDataset_pivoted.csv'
dataset_pivoted.to_csv(output_file, index_label='timestamp')