In [1]:
import pandas as pd

In [2]:
df = pd.read_csv('data.csv')

In [3]:
cols = df.columns[1:]

In [4]:
descs = {
    'torque': 'Torque is the turning force of a one-meter rod required to hold a 1kg mass constant',
    'outlet_pressure_bar': 'The outlet pressure next to the piston valve',
    'air_flow': 'amount of air that an air compressor can deliver',
    'noise_db': 'Level of sound produced by an air compressor during operation',
    'outlet_temp': 'temperature of the compressed air as it exits the compressor',
    'wpump_outlet_press': 'water pump outlet pressure',
    'water_inlet_temp': 'Water inlet temperature occurs according to the radiator size and fan capacity',
    'water_outlet_temp': 'Water outlet temperature is related to how much of the heat generated by the heating of the compressed air is transferred to the water',
    'wpump_power': 'Water pump power',
    'water_flow': 'Water flow',
    'oilpump_power': 'Oil pump power',
    'gaccx': 'Ground acceleration in the x axis from where the compressor is attached to a rigid part or construction',
    'gaccy': 'Ground acceleration in the y axis from where the compressor is attached to a rigid part or construction',
    'gaccz': 'Ground acceleration in the z axis from where the compressor is attached to a rigid part or construction',
    'haccx': 'Head acceleration in the x axis measured from the compressor head bolt or upper cooling fin. Each piece has a natural frequency and they oscillate at a certain frequency as a result of the incoming drive',
    'haccy': 'Head acceleration in the y axis measured from the compressor head bolt or upper cooling fin. Each piece has a natural frequency and they oscillate at a certain frequency as a result of the incoming drive',
    'haccz': 'Head acceleration in the z axis measured from the compressor head bolt or upper cooling fin. Each piece has a natural frequency and they oscillate at a certain frequency as a result of the incoming drive',
    'rpm': 'Number of revolutions per minute of the electric motor',
    'motor_power': 'amount of electrical energy that a motor can convert into mechanical power'
}

In [5]:
targets = ['bearings', 'wpump', 'radiator', 'exvalve', 'acmotor']

In [6]:
prompt_template = 'Select the variables from the list that are most relevant for predicting <target_variable>. ' +\
                  'Provide the variables sorted starting with the one with the highest priority. ' +\
                  'Variables and their descriptions: <all_variables>\n' + \
                  '```json\n{"reasoning": "<your reasoning>", "selected_variables": ["variable 1", "variable 2", ..., "variable n"]}\n```'

In [7]:
str_descs = '\n'.join([f'{k}: {v}' for k, v in descs.items()])

In [8]:
prompt = prompt_template.replace('<all_variables>', '\n' + str_descs)
prompt = prompt.replace('<target_variable>', 'motor failure in air compressors')

In [9]:
print(prompt)

Select the variables from the list that are most relevant for predicting motor failure in air compressors. Provide the variables sorted starting with the one with the highest priority. Variables and their descriptions: 
torque: Torque is the turning force of a one-meter rod required to hold a 1kg mass constant
outlet_pressure_bar: The outlet pressure next to the piston valve
air_flow: amount of air that an air compressor can deliver
noise_db: Level of sound produced by an air compressor during operation
outlet_temp: temperature of the compressed air as it exits the compressor
wpump_outlet_press: water pump outlet pressure
water_inlet_temp: Water inlet temperature occurs according to the radiator size and fan capacity
water_outlet_temp: Water outlet temperature is related to how much of the heat generated by the heating of the compressed air is transferred to the water
wpump_power: Water pump power
water_flow: Water flow
oilpump_power: Oil pump power
gaccx: Ground acceleration in the x 

In [10]:
df.columns

Index(['id', 'rpm', 'motor_power', 'torque', 'outlet_pressure_bar', 'air_flow',
       'noise_db', 'outlet_temp', 'wpump_outlet_press', 'water_inlet_temp',
       'water_outlet_temp', 'wpump_power', 'water_flow', 'oilpump_power',
       'oil_tank_temp', 'gaccx', 'gaccy', 'gaccz', 'haccx', 'haccy', 'haccz',
       'bearings', 'wpump', 'radiator', 'exvalve', 'acmotor'],
      dtype='object')

In [11]:
df['bearings'] = df['bearings'].replace({'Ok': 0, 'Noisy': 1})
df['wpump'] = df['wpump'].replace({'Ok': 0, 'Noisy': 1})
df['radiator'] = df['radiator'].replace({'Clean': 0, 'Dirty': 1})
df['exvalve'] = df['exvalve'].replace({'Clean': 0, 'Dirty': 1})
df['acmotor'] = df['acmotor'].replace({'Clean': 0, 'Dirty': 1})

  df['bearings'] = df['bearings'].replace({'Ok': 0, 'Noisy': 1})
  df['wpump'] = df['wpump'].replace({'Ok': 0, 'Noisy': 1})
  df['radiator'] = df['radiator'].replace({'Clean': 0, 'Dirty': 1})
  df['exvalve'] = df['exvalve'].replace({'Clean': 0, 'Dirty': 1})


In [12]:
def calc_score(preds, corrs, topk=5):
    corrs = (corrs - corrs.min()) / (corrs.max() - corrs.min())
    sum_score = 0
    for i, val in enumerate(preds[:topk]):
        sum_score += corrs[val] / (i+1)
    sum_score /= i
    return sum_score

## Bearing failure

In [13]:
preds_bearing = {
  "reasoning": "Bearing failure in air compressors is typically preceded by increased mechanical vibration, load fluctuations, and noise. The most critical indicators are head acceleration measurements (vibration near the bearing location), followed by torque (indicates mechanical load), rpm (rotational stress), and noise level (can rise with wear). Ground acceleration may indicate structural vibrations but is less localized. Thermal and fluid variables are generally less predictive unless they indirectly signal increased stress or cooling/lubrication issues.",
  "selected_variables": [
    "haccx",
    "haccy",
    "haccz",
    "torque",
    "rpm",
    "motor_power",
    "noise_db",
    "gaccx",
    "gaccy",
    "gaccz",
    "oilpump_power"
  ]
}


In [14]:
col = 'bearings'
corrs = df[list(descs.keys()) + [col]].corr()[col].abs().sort_values(ascending=False)
# calc_score(preds_radiator['selected_variables'], corrs, topk=None)
[round(corrs[item] * 100, 2) for item in preds_bearing['selected_variables']][:5]

[0.07, 1.94, 4.51, 2.75, 0.01]

In [17]:
corrs * 100

bearings               100.000000
water_flow              36.026515
noise_db                34.915246
air_flow                16.941736
outlet_temp             14.553592
water_outlet_temp       14.125291
water_inlet_temp        13.896190
wpump_power             10.111925
wpump_outlet_press       8.847722
gaccz                    4.537326
haccz                    4.511697
outlet_pressure_bar      2.805601
torque                   2.754589
oilpump_power            1.942545
haccy                    1.940429
motor_power              1.227129
gaccy                    0.106807
gaccx                    0.102937
haccx                    0.072432
rpm                      0.011239
Name: bearings, dtype: float64

In [152]:
len(preds_bearings['selected_variables'])

8

## Water pump failure

In [19]:
preds_wpump = {
  "reasoning": "Water pump failure in air compressors is primarily related to mechanical or electrical issues such as motor overload, cavitation, flow restriction, overheating, or wear. Key indicators include reduced water flow, abnormal outlet pressure, and increased power draw. The most directly relevant variables are those that measure the pump’s hydraulic output (flow and pressure), its power consumption, and resulting temperature changes. Vibration and noise may also indicate mechanical wear or imbalance. Thermal variables downstream of the pump can reflect its degraded performance.",
  "selected_variables": [
    "wpump_power",
    "water_flow",
    "wpump_outlet_press",
    "water_outlet_temp",
    "water_inlet_temp",
    "noise_db",
    "torque",
    "motor_power",
    "gaccx",
    "gaccy",
    "gaccz"
  ]
}

In [20]:
col = 'wpump'
corrs = df[list(descs.keys()) + [col]].corr()[col].abs().sort_values(ascending=False)
# calc_score(preds_wpump['selected_variables'], corrs, topk=None)
[round(corrs[item] * 100, 2) for item in preds_wpump['selected_variables']][:5]

[15.28, 21.38, 13.62, 15.87, 14.52]

In [21]:
corrs

wpump                  1.000000
water_flow             0.213761
air_flow               0.166481
water_outlet_temp      0.158693
outlet_temp            0.154489
wpump_power            0.152801
water_inlet_temp       0.145230
wpump_outlet_press     0.136235
noise_db               0.074709
oilpump_power          0.032483
outlet_pressure_bar    0.006053
torque                 0.005940
gaccy                  0.005189
gaccz                  0.004280
haccz                  0.004270
motor_power            0.003835
gaccx                  0.002785
haccy                  0.002301
haccx                  0.000964
rpm                    0.000071
Name: wpump, dtype: float64

## Radiator failure

In [22]:
preds_radiator = {
  "reasoning": "Radiator failure in air compressors typically results in inadequate cooling, which leads to abnormal temperature patterns in both air and water circuits. The most relevant indicators are thermal variables that reflect the radiator’s heat exchange performance, such as water outlet and inlet temperatures, and outlet air temperature. Water flow and pump performance influence radiator cooling effectiveness. A failing radiator often causes rising outlet temps and reduced temperature delta across the radiator. Noise and vibration may also indirectly indicate fan or structural issues affecting the radiator.",
  "selected_variables": [
    "water_outlet_temp",
    "water_inlet_temp",
    "outlet_temp",
    "water_flow",
    "wpump_power",
    "wpump_outlet_press",
    "noise_db",
    "motor_power",
    "torque"
  ]
}

In [23]:
col = 'radiator'
corrs = df[list(descs.keys()) + [col]].corr()[col].abs().sort_values(ascending=False)
[corrs[item] for item in preds_radiator['selected_variables']][:5]
# calc_score(preds_radiator['selected_variables'], corrs, topk=None)
[round(corrs[item] * 100, 2) for item in preds_radiator['selected_variables']][:5]

[31.85, 31.83, 31.78, 86.88, 25.16]

In [24]:
corrs

radiator               1.000000
water_flow             0.868788
water_outlet_temp      0.318526
water_inlet_temp       0.318285
outlet_temp            0.317844
wpump_power            0.251588
wpump_outlet_press     0.177435
noise_db               0.096561
oilpump_power          0.073828
air_flow               0.022791
haccy                  0.020830
haccz                  0.016788
gaccz                  0.016755
gaccy                  0.014711
torque                 0.010613
outlet_pressure_bar    0.009438
motor_power            0.008300
gaccx                  0.002300
haccx                  0.000397
rpm                    0.000230
Name: radiator, dtype: float64

## Outlet valve failure

In [25]:
preds_valve = {
  "reasoning": "Outlet valve failure in air compressors typically results in changes to air flow dynamics and pressure behavior. A stuck or leaking valve can cause reduced outlet pressure or inconsistent air flow, while a blocked valve might lead to pressure buildup and overheating. Therefore, the most relevant indicators are outlet pressure, air flow, and outlet temperature. Torque and motor load may rise due to increased resistance or mechanical stress. Noise and vibration may reflect mechanical irregularities at the valve level, especially if the failure involves physical damage or wear.",
  "selected_variables": [
    "outlet_pressure_bar",
    "air_flow",
    "outlet_temp",
    "torque",
    "motor_power",
    "noise_db",
    "haccx",
    "haccy",
    "haccz",
    "rpm"
  ]
}

In [26]:
col = 'exvalve'
corrs = df[list(descs.keys()) + [col]].corr()[col].abs().sort_values(ascending=False)
[corrs[item] for item in preds_valve['selected_variables']][:5]
[round(corrs[item] * 100, 2) for item in preds_valve['selected_variables']][:5]
# calc_score(preds_radiator['selected_variables'], corrs)

[1.66, 52.64, 14.42, 1.43, 0.3]

In [27]:
corrs

exvalve                1.000000
air_flow               0.526380
water_flow             0.360284
water_outlet_temp      0.151188
water_inlet_temp       0.150898
outlet_temp            0.144152
wpump_power            0.130885
wpump_outlet_press     0.093098
noise_db               0.088678
gaccy                  0.023534
outlet_pressure_bar    0.016574
gaccz                  0.016326
haccz                  0.015680
torque                 0.014258
oilpump_power          0.005163
haccy                  0.004758
motor_power            0.002958
haccx                  0.000886
rpm                    0.000607
gaccx                  0.000328
Name: exvalve, dtype: float64

## Motor failure

In [77]:
preds_motor = {
  "reasoning": "Motor failure in air compressors is commonly associated with electrical overload, mechanical stress, excessive vibration, and thermal issues. The most indicative variables include motor power (to detect overloading or unusual consumption), torque (for mechanical resistance), and rpm (to observe speed fluctuations or stalling). Vibrations at the motor mounting (ground acceleration) and head (if driven directly) help detect mechanical imbalances, while noise can reflect internal wear or misalignment. A drop in air flow or changes in outlet pressure may signal downstream effects of motor underperformance.",
  "selected_variables": [
    "motor_power",
    "torque",
    "rpm",
    "gaccx",
    "gaccy",
    "gaccz",
    "noise_db",
    "haccx",
    "haccy",
    "haccz",
    "air_flow"
  ]
}