In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os

In [2]:
path = os.path.realpath('SW-106.csv')
dataset = pd.read_csv(path)

In [3]:
dataset

Unnamed: 0,when,hwid,metric,value
0,1601510422405,SW-106,S49,0
1,1601510422405,SW-106,S50,0
2,1601510422405,SW-106,S53,0
3,1601510422405,SW-106,S54,0
4,1601510422405,SW-106,S55,1
...,...,...,...,...
26187744,1616830116654,SW-106,S143,0
26187745,1616830116654,SW-106,S140,73
26187746,1616830116654,SW-106,S138,51
26187747,1616830116654,SW-106,S137,82


In [4]:
# Pivot wider: spread 'metric' into columns, values from 'value'
dataset = dataset.pivot(index='when', columns='metric', values='value').reset_index()

# Convert 'when' (in milliseconds) to datetime
dataset['datetime'] = pd.to_datetime(dataset['when'], unit='ms', utc=True)

# Reorder columns: put 'datetime' first
cols = ['datetime'] + [col for col in dataset.columns if col not in ['datetime']]
dataset = dataset[cols]

# Sort by 'when'
dataset = dataset.sort_values('when').reset_index(drop=True)

In [5]:
dataset

metric,datetime,when,A5,A9,ComError,P1,P10,P15,P16,P17,...,SA12,SA2,SA3,SA4,SA5,SA6,SA7,SA8,SA9,SW
0,2020-10-01 00:00:22.405000+00:00,1601510422405,,,,2.0,450.0,0.0,100.0,400.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2020-10-01 00:00:25.346000+00:00,1601510425346,,,,,,,,,...,,,,,,,,,,1.0
2,2020-10-01 00:00:52.526000+00:00,1601510452526,,,,2.0,450.0,0.0,100.0,400.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2020-10-01 00:00:55.485000+00:00,1601510455485,,,,,,,,,...,,,,,,,,,,1.0
4,2020-10-01 00:01:22.806000+00:00,1601510482806,,,,2.0,450.0,0.0,100.0,400.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398034,2021-03-27 07:27:35.592000+00:00,1616830055592,,,,,,,,,...,,,,,,,,,,1.0
398035,2021-03-27 07:28:02.953000+00:00,1616830082953,,,,0.0,450.0,0.0,140.0,450.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
398036,2021-03-27 07:28:05.771000+00:00,1616830085771,,,,,,,,,...,,,,,,,,,,1.0
398037,2021-03-27 07:28:33.937000+00:00,1616830113937,,,,0.0,450.0,0.0,140.0,450.0,...,0.0,16.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [6]:
# Fill NA with nearest non-NA values in both directions
cols_to_fill = [col for col in dataset.columns if col.startswith(('S', 'P', 'A', 'ComError'))]
dataset[cols_to_fill] = dataset[cols_to_fill].ffill().bfill()

# Compute time differences (in seconds)
dataset['time_diff'] = dataset['datetime'].diff().dt.total_seconds()

# Remove the first row with NA time_diff
time_differences = dataset['time_diff'].dropna()

# Move time_diff to the front
cols = ['time_diff'] + [col for col in dataset.columns if col != 'time_diff']
dataset = dataset[cols]

# Print summary statistics
print("Statistiche delle differenze temporali (in secondi):")
print(time_differences.describe())

# Create frequency table
freq_df = time_differences.value_counts().reset_index()
freq_df.columns = ['differenza_secondi', 'frequenza']
freq_df = freq_df.sort_values('differenza_secondi').reset_index(drop=True)

dataset = dataset.copy()

  dataset['time_diff'] = dataset['datetime'].diff().dt.total_seconds()


Statistiche delle differenze temporali (in secondi):
count    3.980380e+05
mean     3.848802e+01
std      8.665382e+03
min      2.040000e+00
25%      2.880000e+00
50%      2.352050e+01
75%      2.743700e+01
max      5.003774e+06
Name: time_diff, dtype: float64


In [7]:
dataset

metric,time_diff,datetime,when,A5,A9,ComError,P1,P10,P15,P16,...,SA12,SA2,SA3,SA4,SA5,SA6,SA7,SA8,SA9,SW
0,,2020-10-01 00:00:22.405000+00:00,1601510422405,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
1,2.941,2020-10-01 00:00:25.346000+00:00,1601510425346,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
2,27.180,2020-10-01 00:00:52.526000+00:00,1601510452526,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
3,2.959,2020-10-01 00:00:55.485000+00:00,1601510455485,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
4,27.321,2020-10-01 00:01:22.806000+00:00,1601510482806,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
398034,2.845,2021-03-27 07:27:35.592000+00:00,1616830055592,0.0,0.0,0.0,0.0,450.0,0.0,140.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
398035,27.361,2021-03-27 07:28:02.953000+00:00,1616830082953,0.0,0.0,0.0,0.0,450.0,0.0,140.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
398036,2.818,2021-03-27 07:28:05.771000+00:00,1616830085771,0.0,0.0,0.0,0.0,450.0,0.0,140.0,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
398037,28.166,2021-03-27 07:28:33.937000+00:00,1616830113937,0.0,0.0,0.0,0.0,450.0,0.0,140.0,...,0.0,16.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0


In [17]:
dataset[dataset['time_diff'] > 2500]

metric,time_diff,datetime,when,A5,A9,ComError,P1,P10,P15,P16,...,SA12,SA2,SA3,SA4,SA5,SA6,SA7,SA8,SA9,SW
21590,3864.831,2020-10-05 23:46:49.996000+00:00,1601941609996,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
22642,3978.27,2020-10-06 06:02:47.587000+00:00,1601964167587,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
27932,4011.158,2020-10-07 13:47:50.274000+00:00,1602078470274,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
36810,3890.06,2020-10-09 15:23:03.019000+00:00,1602256983019,0.0,0.0,1.0,2.0,450.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
82511,48450.573,2020-10-20 05:50:32.029000+00:00,1603173032029,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
145841,4013.006,2020-11-02 13:20:44.128000+00:00,1604323244128,0.0,0.0,1.0,2.0,450.0,0.0,140.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
183135,5003774.307,2021-01-07 13:14:09.411000+00:00,1610025249411,0.0,0.0,1.0,2.0,450.0,0.0,140.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
190951,5733.419,2021-01-09 07:10:06.700000+00:00,1610176206700,0.0,0.0,1.0,2.0,450.0,0.0,140.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
223053,2606.864,2021-01-16 03:09:05.912000+00:00,1610766545912,0.0,0.0,1.0,2.0,450.0,0.0,140.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
223085,8173.097,2021-01-16 05:32:57.730000+00:00,1610775177730,0.0,0.0,1.0,2.0,450.0,0.0,140.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
