# Importing Libraries

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


# Load dataset

In [6]:
data = pd.read_csv('failure.csv') 

# 1. Drop irrelevant columns

In [7]:
data_cleaned = data.drop(columns=['Car ID'])

# 2. Combine failure columns into a single target column

In [8]:
data_cleaned['Failure'] = (data_cleaned[['Failure A', 'Failure B', 'Failure C', 'Failure D', 'Failure E']].sum(axis=1) > 0).astype(int)
data_cleaned = data_cleaned.drop(columns=['Failure A', 'Failure B', 'Failure C', 'Failure D', 'Failure E'])

# 3. Convert Temperature to consistent unit (Celsius)

In [9]:
def fahrenheit_to_celsius(temp):
    if "°F" in temp:
        value = float(temp.replace(" °F", ""))
        return (value - 32) * 5.0 / 9.0
    elif "°C" in temp:
        return float(temp.replace(" °C", ""))
    else:
        return np.nan

data_cleaned['Temperature'] = data_cleaned['Temperature'].apply(fahrenheit_to_celsius)

# 4. Handle missing values

In [10]:
data_cleaned['Membership'] = data_cleaned['Membership'].fillna(data_cleaned['Membership'].mode()[0])

# 5. One-hot encode categorical variables

In [11]:
data_encoded = pd.get_dummies(data_cleaned, columns=['Model', 'Color', 'Factory', 'Usage', 'Membership'], drop_first=True)

# 6. Split data into features (X) and target (y)

In [12]:
X = data_encoded.drop(columns=['Failure'])
y = data_encoded['Failure']

# 7. Split data into training and testing sets

In [13]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# 8. Scale numerical features

In [14]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Check results
print("Training data shape:", X_train_scaled.shape)
print("Testing data shape:", X_test_scaled.shape)

# Define the neural network architecture
model = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.3),  # Regularization to prevent overfitting
    Dense(32, activation='relu'),
    Dropout(0.3),
    Dense(1, activation ='sigmoid')  # Output layer for binary classification
])

Training data shape: (8064, 55)
Testing data shape: (2017, 55)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


# Compile the model

In [15]:
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])

# Set up a callback to save the best model weights

In [16]:
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, verbose=1)

# Train the model

In [17]:
history = model.fit(X_train_scaled, y_train,
                    epochs=50,
                    batch_size=32,
                    validation_split=0.2,
                    callbacks=[checkpoint],
                    verbose=1)

Epoch 1/50
[1m181/202[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 2ms/step - accuracy: 0.8187 - loss: 0.4814
Epoch 1: val_loss improved from inf to 0.35804, saving model to best_model.keras
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 3ms/step - accuracy: 0.8230 - loss: 0.4760 - val_accuracy: 0.8834 - val_loss: 0.3580
Epoch 2/50
[1m172/202[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 1ms/step - accuracy: 0.8807 - loss: 0.3632
Epoch 2: val_loss improved from 0.35804 to 0.33583, saving model to best_model.keras
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.8812 - loss: 0.3622 - val_accuracy: 0.8859 - val_loss: 0.3358
Epoch 3/50
[1m195/202[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.8790 - loss: 0.3564
Epoch 3: val_loss improved from 0.33583 to 0.32697, saving model to best_model.keras
[1m202/202[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 

# Load the best weights

In [18]:
model.load_weights('best_model.keras')

# Evaluate the model on the test set

In [19]:
test_loss, test_accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)

In [20]:
print(f"Test Accuracy: {test_accuracy:.2f}")

Test Accuracy: 0.90
