In [1]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [3]:
!apt-get install unrar
# Set 1
!unrar x "1st_test.rar" /content/bearing_data/set1/

# Set 2
!unrar x "2nd_test.rar" /content/bearing_data/set2/

# Set 3
!unrar x "3rd_test.rar" /content/bearing_data/set3/


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
unrar is already the newest version (1:6.1.5-1ubuntu0.1).
0 upgraded, 0 newly installed, 0 to remove and 35 not upgraded.

UNRAR 6.11 beta 1 freeware      Copyright (c) 1993-2022 Alexander Roshal


Extracting from 1st_test.rar


Would you like to replace the existing file /content/bearing_data/set1/1st_test/2003.10.22.12.06.24
1148716 bytes, modified on 2004-05-12 20:02
with a new one
1148716 bytes, modified on 2004-05-12 20:02

[Y]es, [N]o, [A]ll, n[E]ver, [R]ename, [Q]uit A

Extracting  /content/bearing_data/set1/1st_test/2003.10.22.12.06.24        0%  OK 
Extracting  /content/bearing_data/set1/1st_test/2003.10.22.12.09.13        0%  OK 
Extracting  /content/bearing_data/set1/1st_test/2003.10.22.12.14.13        0%  OK 
Extracting  /content/bearing_data/set1/1st_test/2003.10.22.12.19.13        0%  OK 
Extracting  /content/bearing_data/set1/1st_test/2003.

In [19]:
set1_dir = "/content/bearing_data/set1/1st_test/"
set2_dir = "/content/bearing_data/set2/2nd_test/"
set3_dir = "/content/bearing_data/set3/4th_test/txt/"


In [20]:
import os

# Check deeply nested paths
for root, dirs, files in os.walk("/content/bearing_data/set1"):
    print("📂", root)
    if files:
        print("    📄", files[:3])  # print first 3 files only

# Check deeply nested paths in set2
for root, dirs, files in os.walk("/content/bearing_data/set2"):
    print("📂", root)
    if files:
        print("    📄", files[:3])  # show first 3 files only

# Check deeply nested paths in set3
for root, dirs, files in os.walk("/content/bearing_data/set3"):
    print("📂", root)
    if files:
        print("    📄", files[:3])  # show first 3 files only

📂 /content/bearing_data/set1
📂 /content/bearing_data/set1/1st_test
    📄 ['2003.11.08.13.31.44', '2003.11.09.13.15.58', '2003.11.14.17.12.17']
📂 /content/bearing_data/set2
📂 /content/bearing_data/set2/2nd_test
    📄 ['2004.02.14.12.22.39', '2004.02.16.10.42.39', '2004.02.16.06.22.39']
📂 /content/bearing_data/set3
📂 /content/bearing_data/set3/4th_test
📂 /content/bearing_data/set3/4th_test/txt
    📄 ['2004.03.09.04.22.46', '2004.03.11.05.52.46', '2004.03.06.16.02.46']


In [28]:
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew
import os
from tqdm import tqdm

def extract_features_from_directory(directory, label):
    files = sorted([
        os.path.join(directory, f)
        for f in os.listdir(directory)
        if os.path.isfile(os.path.join(directory, f)) and len(f.split(".")) == 6
    ])

    data = []
    for file in tqdm(files, desc=f"Extracting {label}"):
        try:
            signal = np.loadtxt(file)
            if signal.ndim == 2:  # Multiple channels (20480, 4)
                for ch in range(signal.shape[1]):
                    channel_data = signal[:, ch]
                    features = {
                        'mean': np.mean(channel_data),
                        'std': np.std(channel_data),
                        'min': np.min(channel_data),
                        'max': np.max(channel_data),
                        'kurtosis': kurtosis(channel_data),
                        'skewness': skew(channel_data),
                        'label': label,
                        'filename': os.path.basename(file),
                        'channel': ch + 1
                    }
                    data.append(features)
            else:  # Single channel
                features = {
                    'mean': np.mean(signal),
                    'std': np.std(signal),
                    'min': np.min(signal),
                    'max': np.max(signal),
                    'kurtosis': kurtosis(signal),
                    'skewness': skew(signal),
                    'label': label,
                    'filename': os.path.basename(file),
                    'channel': 1
                }
                data.append(features)
        except Exception as e:
            print(f"⚠️ Error reading {file}: {e}")
    return pd.DataFrame(data)


In [29]:
# Run this after updating function
features_set1 = extract_features_from_directory('/content/bearing_data/set1/1st_test', label='set1')
features_set2 = extract_features_from_directory('/content/bearing_data/set2/2nd_test', label='set2')
features_set3 = extract_features_from_directory('/content/bearing_data/set3/4th_test/txt', label='set3')

# Combine features
all_features = pd.concat([features_set1, features_set2, features_set3], ignore_index=True)

# Show sample
all_features.head()


Extracting set1: 100%|██████████| 937/937 [00:52<00:00, 17.83it/s]
Extracting set2: 100%|██████████| 984/984 [00:25<00:00, 38.15it/s]
Extracting set3: 100%|██████████| 1144/1144 [00:33<00:00, 33.78it/s]


Unnamed: 0,mean,std,min,max,kurtosis,skewness,label,filename,channel
0,-0.094593,0.081122,-0.72,0.388,1.069163,-0.029993,set1,2003.10.22.12.06.24,1
1,-0.09388,0.070648,-0.564,0.701,3.065884,0.220116,set1,2003.10.22.12.06.24,2
2,-0.093817,0.090648,-0.674,0.359,0.209486,-0.092073,set1,2003.10.22.12.06.24,3
3,-0.093752,0.077508,-0.53,0.256,0.292221,-0.053183,set1,2003.10.22.12.06.24,4
4,-0.090812,0.091461,-0.496,0.4,0.405439,0.034372,set1,2003.10.22.12.06.24,5


In [30]:
# Check for missing/null values
print(all_features.isnull().sum())

# Drop or fill if necessary
all_features.dropna(inplace=True)  # or use .fillna() if you want to impute



mean        0
std         0
min         0
max         0
kurtosis    0
skewness    0
label       0
filename    0
channel     0
dtype: int64


In [31]:
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
all_features['label'] = le.fit_transform(all_features['label'])


In [32]:
from sklearn.model_selection import train_test_split

X = all_features.drop(['label'], axis=1)
y = all_features['label']  # or use a regression target like RUL if available

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [34]:
# Drop non-numeric and target columns
X = all_features.drop(columns=['label', 'filename', 'channel'])  # <- Fix here
y = all_features['label']

# Train-Test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and Evaluate
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
y_pred = model.predict(X_test)

print("✅ Accuracy:", accuracy_score(y_test, y_pred))
print("\n📊 Classification Report:\n", classification_report(y_test, y_pred))
print("\n🧾 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))


✅ Accuracy: 0.9865708931917552

📊 Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00      1468
           1       0.97      0.98      0.97       850
           2       0.98      0.98      0.98       884

    accuracy                           0.99      3202
   macro avg       0.98      0.98      0.98      3202
weighted avg       0.99      0.99      0.99      3202


🧾 Confusion Matrix:
 [[1468    0    0]
 [   0  829   21]
 [   0   22  862]]
