<a href="https://colab.research.google.com/github/Demi-greaterme/ML_Bearing-Failure/blob/main/Predictive_Analysis_System.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import kurtosis, skew

# Load your two CSVs (after you upload them in Colab)
normal = pd.read_csv('/content/Full Load Normal Bearing (1).csv')
fault  = pd.read_csv('/content/Full Load Faulty Bearing (1).csv')

# Drop the time column if it exists
if 'Time' in normal.columns:
    normal = normal.drop(columns=['time stamp'])
if 'Time' in fault.columns:
    fault = fault.drop(columns=['time stamp'])

# Feature extraction for one segment (1 second)
def extract_features(df):
    feats = {}
    for axis in ['AcX', 'AcY', 'AcZ']:
        data = df[axis].values
        feats[f'mean_{axis}'] = np.mean(data)
        feats[f'std_{axis}'] = np.std(data)
        feats[f'rms_{axis}'] = np.sqrt(np.mean(np.square(data)))
        feats[f'kurt_{axis}'] = kurtosis(data)
        feats[f'skew_{axis}'] = skew(data)
        # protect against division by zero
        if feats[f'rms_{axis}'] == 0:
            feats[f'crest_{axis}'] = np.nan
        else:
            feats[f'crest_{axis}'] = np.max(np.abs(data)) / feats[f'rms_{axis}']
    return feats

# Split each file into 1-second chunks (1000 samples each)
def process_file(df, label):
    rows = []
    segment_size = 1  # 1000 samples = 1 second if data is 1 sample per ms
    for start in range(0, len(df), segment_size):
        seg = df.iloc[start:start+segment_size]
        if len(seg) < segment_size:
            continue
        f = extract_features(seg)
        f['label'] = label
        rows.append(f)
    return pd.DataFrame(rows)

# Process both CSVs
normal_features = process_file(normal, 0)
fault_features  = process_file(fault, 1)

# Merge into one dataset and save
final_df = pd.concat([normal_features, fault_features], ignore_index=True)
final_df.to_csv('bearing_features.csv', index=False)

print(f"✅ Done! {len(final_df)} total rows generated.")
print(final_df.head())


✅ Done! 4 total rows generated.
   mean_AcX   std_AcX   rms_AcX  kurt_AcX  skew_AcX  crest_AcX  mean_AcY  \
0  0.015034  0.002052  0.015173 -1.081250 -0.607381   1.146435  0.006083   
1  0.014828  0.002085  0.014974 -1.314312 -0.418858   1.169839  0.005963   
2  0.014328  0.000791  0.014350 -0.908844  0.062172   1.122884  0.006103   
3  0.014408  0.000720  0.014426 -0.814038  0.030178   1.129639  0.005499   

    std_AcY   rms_AcY  kurt_AcY  skew_AcY  crest_AcY  mean_AcZ   std_AcZ  \
0  0.000218  0.006086 -0.395360 -0.878813   1.052937  0.000267  0.000122   
1  0.000237  0.005967 -0.849299 -0.600621   1.094426  0.000266  0.000140   
2  0.000852  0.006162 -0.985953 -0.431070   1.406413  0.000964  0.000470   
3  0.000828  0.005561 -1.310557 -0.161511   1.317075  0.000978  0.000511   

    rms_AcZ   kurt_AcZ  skew_AcZ  crest_AcZ  label  
0  0.000293  71.463314  5.253646   6.654978      0  
1  0.000301  77.507796  6.708859   6.499152      0  
2  0.001072  -0.443259  0.055874   1.821575    

In [None]:
final_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 19 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   mean_AcX   4 non-null      float64
 1   std_AcX    4 non-null      float64
 2   rms_AcX    4 non-null      float64
 3   kurt_AcX   4 non-null      float64
 4   skew_AcX   4 non-null      float64
 5   crest_AcX  4 non-null      float64
 6   mean_AcY   4 non-null      float64
 7   std_AcY    4 non-null      float64
 8   rms_AcY    4 non-null      float64
 9   kurt_AcY   4 non-null      float64
 10  skew_AcY   4 non-null      float64
 11  crest_AcY  4 non-null      float64
 12  mean_AcZ   4 non-null      float64
 13  std_AcZ    4 non-null      float64
 14  rms_AcZ    4 non-null      float64
 15  kurt_AcZ   4 non-null      float64
 16  skew_AcZ   4 non-null      float64
 17  crest_AcZ  4 non-null      float64
 18  label      4 non-null      int64  
dtypes: float64(18), int64(1)
memory usage: 740.0 bytes


In [None]:
final_df.shape[0]

4