In [71]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import load_iris
from sklearn import preprocessing
import pandas as pd
from imblearn.over_sampling import RandomOverSampler
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
from sklearn.model_selection import cross_val_score
from sklearn.metrics import mean_squared_error
import random



# Load sample data
df = pd.read_csv('/content/smoke_detection_iot.csv')
df



Unnamed: 0.1,Unnamed: 0,UTC,Temperature[C],Humidity[%],TVOC[ppb],eCO2[ppm],Raw H2,Raw Ethanol,Pressure[hPa],PM1.0,PM2.5,NC0.5,NC1.0,NC2.5,CNT,Fire Alarm
0,0,1654733331,20.000,57.36,0,400,12306,18520,939.735,0.00,0.00,0.00,0.000,0.000,0,0
1,1,1654733332,20.015,56.67,0,400,12345,18651,939.744,0.00,0.00,0.00,0.000,0.000,1,0
2,2,1654733333,20.029,55.96,0,400,12374,18764,939.738,0.00,0.00,0.00,0.000,0.000,2,0
3,3,1654733334,20.044,55.28,0,400,12390,18849,939.736,0.00,0.00,0.00,0.000,0.000,3,0
4,4,1654733335,20.059,54.69,0,400,12403,18921,939.744,0.00,0.00,0.00,0.000,0.000,4,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62625,62625,1655130047,18.438,15.79,625,400,13723,20569,936.670,0.63,0.65,4.32,0.673,0.015,5739,0
62626,62626,1655130048,18.653,15.87,612,400,13731,20588,936.678,0.61,0.63,4.18,0.652,0.015,5740,0
62627,62627,1655130049,18.867,15.84,627,400,13725,20582,936.687,0.57,0.60,3.95,0.617,0.014,5741,0
62628,62628,1655130050,19.083,16.04,638,400,13712,20566,936.680,0.57,0.59,3.92,0.611,0.014,5742,0


In [78]:
random.seed(42)
# Encode categorical features
label_encoder = preprocessing.LabelEncoder()
df['Unnamed: 0'] = label_encoder.fit_transform(df['Unnamed: 0'])
df['UTC'] = label_encoder.fit_transform(df['UTC'])
df['Temperature[C]'] = label_encoder.fit_transform(df['Temperature[C]'])
df['Humidity[%]'] = label_encoder.fit_transform(df['Humidity[%]'])
df['TVOC[ppb]'] = label_encoder.fit_transform(df['TVOC[ppb]'])
df['eCO2[ppm]'] = label_encoder.fit_transform(df['eCO2[ppm]'])
df['Raw H2'] = label_encoder.fit_transform(df['Raw H2'])
df['Raw Ethanol'] = label_encoder.fit_transform(df['Raw Ethanol'])
df['Pressure[hPa]'] = label_encoder.fit_transform(df['Pressure[hPa]'])
df['PM1.0'] = label_encoder.fit_transform(df['PM1.0'])
df['PM2.5'] = label_encoder.fit_transform(df['PM2.5'])
df['NC0.5'] = label_encoder.fit_transform(df['NC0.5'])
df['NC1.0'] = label_encoder.fit_transform(df['NC1.0'])
df['NC2.5'] = label_encoder.fit_transform(df['NC2.5'])
df['Fire Alarm'] = label_encoder.fit_transform(df['Fire Alarm'])

X = df.drop(['Unnamed: 0'], axis=1)
y = np.array(df['Fire Alarm'])

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create an MLP classifier
mlp = MLPClassifier(hidden_layer_sizes=(100,),        # Number of hidden layers and units per layer
                    activation='relu',                 # Activation function ('identity', 'logistic', 'tanh', 'relu')
                    solver='adam',                     # Solver for weight optimization ('lbfgs', 'sgd', 'adam')
                    alpha=0.0001,                      # L2 penalty (regularization term) parameter
                    batch_size='auto',                 # Size of minibatches for stochastic optimizers
                    learning_rate='constant',          # Learning rate schedule ('constant', 'invscaling', 'adaptive')
                    learning_rate_init=0.001,          # The initial learning rate
                    power_t=0.5,                       # The exponent for inverse scaling learning rate
                    max_iter=200,                      # Maximum number of iterations
                    shuffle=True,                      # Whether to shuffle samples in each iteration
                    random_state=None,                 # Seed for the random number generator
                    tol=0.0001,                        # Tolerance for the optimization
                    verbose=False,                     # Whether to print progress messages
                    warm_start=False,                  # Reuse the previous solution
                    momentum=0.9,                      # Momentum for gradient descent update
                    nesterovs_momentum=True,           # Whether to use Nesterov's momentum
                    early_stopping=False,              # Terminate training when validation score is not improving
                    validation_fraction=0.1,           # Proportion of training data to set aside as validation set
                    beta_1=0.9,                        # Exponential decay rate for estimates of first moment vector in adam
                    beta_2=0.999,                      # Exponential decay rate for estimates of second moment vector in adam
                    epsilon=1e-8,                      # Value for numerical stability in adam
                    n_iter_no_change=10,               # Maximum number of epochs without any improvement in the loss
                    max_fun=15000)                     # Maximum number of function calls for the solver

# Train the MLP classifier
mlp.fit(X_train, y_train)

# Perform cross-validation
scores = cross_val_score(mlp, X_train, y_train, cv=5)
cv_mean = scores.mean()  # Calculate the mean cross-validation accuracy
cv_std = scores.std()  # Calculate the standard deviation of cross-validation accuracy
print("Cross-Validation Mean Accuracy:", cv_mean)
print("Cross-Validation Standard Deviation:", cv_std)

# Make predictions on the test set
y_pred = mlp.predict(X_test)

# Calculate the training error
train_error = mean_squared_error(y_train, mlp.predict(X_train))

# Calculate the test error
test_error = mean_squared_error(y_test, y_pred)

# Compare the training error and test error to assess overfitting
if train_error < test_error:
    print("The model is overfitting.")
else:
    print("The model is not overfitting.")

# Calculate the accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# Calculate the training and test accuracy
train_accuracy = mlp.score(X_train, y_train)
test_accuracy = mlp.score(X_test, y_test)

# Evaluate the model
print("Training Accuracy:", train_accuracy)
print("Test Accuracy:", test_accuracy)
print("Accuracy: {:.2f}%".format(accuracy * 100))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


Cross-Validation Mean Accuracy: 0.9927747160105496
Cross-Validation Standard Deviation: 0.008249279006275085
The model is not overfitting.
Training Accuracy: 0.9994810793549417
Test Accuracy: 0.9996806642184257
Accuracy: 99.97%
              precision    recall  f1-score   support

           0       1.00      1.00      1.00      3594
           1       1.00      1.00      1.00      8932

    accuracy                           1.00     12526
   macro avg       1.00      1.00      1.00     12526
weighted avg       1.00      1.00      1.00     12526

