In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import json

import warnings
warnings.filterwarnings('ignore')

# Loading data 

In [2]:

d2 = pd.read_parquet(
    r"E:\Learning\TEAI Cup\Data\Parquet Data\d2_2_general_cleaning.parquet"
)
d2

Unnamed: 0,machine_id,variable_name,value,timestamp,lowerLimit,upperLimit,nominalValue
0,S-226,stpStepFrIda,"1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...",2025-12-05 08:15:13,-0.10,0.10,1.00
1,S-226,teil2Wire,"1.9513,1.9592,1.9489,1.9397,1.962,1.9648,1.944...",2025-12-05 08:15:13,-0.20,0.20,2.00
2,S-226,teil2Iso,"3.4863,3.4962,3.4957,3.4841,3.4901,3.4888,3.48...",2025-12-05 08:15:13,-0.20,0.20,3.50
3,S-226,iTeil2IsostempelDef,"49.1378,48.7538,48.448,48.9035,48.923,48.8579,...",2025-12-05 08:15:13,-10.00,10.00,50.00
4,S-226,folgefehlerTeil12,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2025-12-05 08:15:13,0.00,10.00,0.00
...,...,...,...,...,...,...,...
2272467,S-226,teil2GapX,"0.4968,0.4947,0.5017,0.4991,0.5023,0.5029,0.50...",2025-12-10 06:05:06,-0.06,0.00,0.53
2272468,S-226,teil2GapY,"0.4975,0.4852,0.4824,0.4824,0.4941,0.4863,0.48...",2025-12-10 06:05:06,-0.06,0.00,0.53
2272469,S-226,prfungTeil1KastenSpk,"1.8906,1.8895,1.8887,1.8898,1.882,1.8869,1.880...",2025-12-10 06:05:06,0.00,0.35,1.60
2272470,S-226,iTeil1Rastfederdifferenz,"0.0034,0.0032,0.0033,0.0006,0.0056,0.0023,0.00...",2025-12-10 06:05:06,-0.08,0.08,0.00


In [3]:
d2.columns.tolist()

['machine_id',
 'variable_name',
 'value',
 'timestamp',
 'lowerLimit',
 'upperLimit',
 'nominalValue']

# Adding some feature to dataset to detect the variations in value field

In [4]:
def add_defect_detection_columns(row: pd.Series):
    value_list = np.array(list(map(float, row.value.split(","))))

    min_val = np.min(value_list)
    max_val = np.max(value_list)
    mean = np.mean(value_list)
    range_val = max_val - min_val

    upperLimit, lowerLimit, nominal = row.upperLimit, row.lowerLimit, row.nominalValue

    count_outside_values = sum(
        1 for i in value_list if i > upperLimit or i < lowerLimit
    )
    max_deviation_from_nominal = np.max(np.abs(value_list - nominal))

    return pd.Series(
        {
            "min": min_val,
            "max": max_val,
            "avergae": mean,
            "range": range_val,
            "outside_range_count": count_outside_values,
            "max_deviation": max_deviation_from_nominal,
        }
    )


In [5]:
d2[["min", "max", "average", "range", "outside_range_count", "max_deviation"]] = (
    d2.apply(add_defect_detection_columns, axis=1)
)
d2


Unnamed: 0,machine_id,variable_name,value,timestamp,lowerLimit,upperLimit,nominalValue,min,max,average,range,outside_range_count,max_deviation
0,S-226,stpStepFrIda,"1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,...",2025-12-05 08:15:13,-0.10,0.10,1.00,1.0000,1.0000,1.000000,0.0000,100.0,0.0000
1,S-226,teil2Wire,"1.9513,1.9592,1.9489,1.9397,1.962,1.9648,1.944...",2025-12-05 08:15:13,-0.20,0.20,2.00,1.9277,1.9776,1.956294,0.0499,100.0,0.0723
2,S-226,teil2Iso,"3.4863,3.4962,3.4957,3.4841,3.4901,3.4888,3.48...",2025-12-05 08:15:13,-0.20,0.20,3.50,3.4784,3.5090,3.491423,0.0306,100.0,0.0216
3,S-226,iTeil2IsostempelDef,"49.1378,48.7538,48.448,48.9035,48.923,48.8579,...",2025-12-05 08:15:13,-10.00,10.00,50.00,48.0250,49.3720,48.798399,1.3470,100.0,1.9750
4,S-226,folgefehlerTeil12,"0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...",2025-12-05 08:15:13,0.00,10.00,0.00,0.0000,0.0000,0.000000,0.0000,0.0,0.0000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2272467,S-226,teil2GapX,"0.4968,0.4947,0.5017,0.4991,0.5023,0.5029,0.50...",2025-12-10 06:05:06,-0.06,0.00,0.53,0.4937,0.5131,0.502404,0.0194,100.0,0.0363
2272468,S-226,teil2GapY,"0.4975,0.4852,0.4824,0.4824,0.4941,0.4863,0.48...",2025-12-10 06:05:06,-0.06,0.00,0.53,0.4808,0.5100,0.493880,0.0292,100.0,0.0492
2272469,S-226,prfungTeil1KastenSpk,"1.8906,1.8895,1.8887,1.8898,1.882,1.8869,1.880...",2025-12-10 06:05:06,0.00,0.35,1.60,1.8805,1.8957,1.888458,0.0152,100.0,0.2957
2272470,S-226,iTeil1Rastfederdifferenz,"0.0034,0.0032,0.0033,0.0006,0.0056,0.0023,0.00...",2025-12-10 06:05:06,-0.08,0.08,0.00,0.0001,0.0177,0.005447,0.0176,0.0,0.0177


# Saving the data

In [6]:
d2.to_parquet(f"E:\Learning\TEAI Cup\Data\Parquet Data\d2_3_Value_features_added.parquet")