In [12]:
import pandas as pd
from matplotlib import pyplot
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.utils import shuffle
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score

## **Healthy Data**

Below are the sensor readings of healthy gear with loading conditions equal to 50%

In [13]:
h30hz50 = pd.read_csv("h30hz50.csv")

## **Broken Data**

Below are the sensor readings of broken gear with loading conditions equal to 50%  

In [14]:
b30hz50 = pd.read_csv("b30hz50.csv")

Headview of a healthy gear reading from 50% loading

In [15]:
h30hz50.head()

Unnamed: 0,a1,a2,a3,a4
0,2.14416,-1.95821,-0.190533,-4.58475
1,-9.92015,-7.47519,1.79468,-7.47251
2,-1.33059,0.751472,-3.5574,0.328149
3,7.76171,-1.49846,-1.76463,10.9919
4,-0.714011,-0.164771,9.65056,8.97095


## Add Failure Column for classification

### 0 --> Healhty and 1 --> Broken

In [16]:
failure = 0
failureArray = np.zeros((len(h30hz50.index),1))
h30hz50['failure'] = failureArray


Checking the new data table with healthy condition added

In [17]:
h30hz50.head()

Unnamed: 0,a1,a2,a3,a4,failure
0,2.14416,-1.95821,-0.190533,-4.58475,0.0
1,-9.92015,-7.47519,1.79468,-7.47251,0.0
2,-1.33059,0.751472,-3.5574,0.328149,0.0
3,7.76171,-1.49846,-1.76463,10.9919,0.0
4,-0.714011,-0.164771,9.65056,8.97095,0.0


## Add Load and Failure Column for Broken

In [18]:
failure = 1
failureArray = np.ones((len(b30hz50.index),1))
b30hz50['failure'] = failureArray

Checking the new data table with broken condition added

In [19]:
b30hz50.tail()

Unnamed: 0,a1,a2,a3,a4,failure
94203,-1.46153,2.7847,-1.15167,-4.5378,1.0
94204,-3.87952,0.85365,6.42388,-5.47492,1.0
94205,1.73315,4.06958,5.3645,0.216419,1.0
94206,8.97857,10.0652,1.55272,3.65662,1.0
94207,6.15826,5.58468,2.41233,2.03878,1.0


## Concat Healthy and Broken

In [20]:
gear_data   = pd.concat([b30hz50,h30hz50], axis =0)
gear_data

Unnamed: 0,a1,a2,a3,a4,failure
0,-3.934680,6.552160,-1.237980,20.310300,1.0
1,2.402850,9.994380,-3.242650,8.313200,1.0
2,6.242730,-3.175770,-0.686974,-4.193820,1.0
3,-3.994110,-14.144800,3.845360,-5.658600,1.0
4,1.765510,-6.464920,5.096230,-9.001360,1.0
...,...,...,...,...,...
110843,3.245040,-2.692110,2.714350,-0.639589,0.0
110844,1.347960,-2.303540,2.583290,-1.174450,0.0
110845,0.003272,-3.241480,-1.423700,-0.610475,0.0
110846,-0.031329,-3.361870,-1.493090,-2.147170,0.0


## Modelling SVM

In [21]:
training_features = ['a1','a2','a3','a4']
label = ['failure']
x = gear_data[training_features]
y = gear_data[label]
x.shape , y.shape

((205056, 4), (205056, 1))

In [22]:
x,y = shuffle(x,y)
x1=x.head(60000)
y1=y.head(60000)
x1,y1 = shuffle(x1,y1)


In [23]:
x1_train, x1_test, y1_train, y1_test = train_test_split(x1, y1, test_size=0.33, random_state=42)

In [24]:
classifier = svm.SVC(kernel='rbf', gamma='auto', C=1.5)

In [None]:
classifier.fit(x1_train, y1_train.values.ravel())

In [43]:
y_pred = classifier.predict(x1_test)

Calculate the failure rate

In [44]:
# Count total number of test samples
total_test_samples = len(y1_test)

# Count number of predicted failed gears (where prediction is 1)
predicted_failed_gears = sum(y_pred)

# Calculate the failure rate
predicted_failure_rate = predicted_failed_gears / total_test_samples

print(f"Predicted Failure Rate: {predicted_failure_rate:.2%}")


Predicted Failure Rate: 45.55%
