# **Using Logistic Regression to classify the results of the 'Acoustic Extinguisher Fire Dataset'**

## **Let's import the core libraries**

In [1]:
! pip install openpyxl



In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import re

from scipy.stats import skew
from sklearn.base import BaseEstimator, TransformerMixin
from joblib import dump, load

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

warnings.filterwarnings('ignore')

## 1. Data retrieval

   ### *Now read data from local file .csv*

In [None]:
DATASETS_DIR = 'datasets/' 
RETRIEVED_DATA = 'raw-data.csv'

def data_retrieval(url):
     
    # Loading data from specific url
    data = pd.read_csv(url)
    
    data.to_csv(DATASETS_DIR + RETRIEVED_DATA, index=False)
    
    return print('Data stored in {}'.format(DATASETS_DIR + RETRIEVED_DATA))

In [None]:
URL = 'C:/Users/rbernal/Documents/GitHub/Proyecto/FAE/data/data_fire.csv'
data_retrieval(URL)
df = pd.read_csv(DATASETS_DIR + RETRIEVED_DATA)

In [9]:
df.head()

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,SIZE,FUEL,DISTANCE,DESIBEL,AIRFLOW,FREQUENCY,STATUS
0,1,gasoline,10,96,0.0,75,0
1,1,gasoline,10,96,0.0,72,1
2,1,gasoline,10,96,2.6,70,1
3,1,gasoline,10,96,3.2,68,1
4,1,gasoline,10,109,4.5,67,1


## 2. Preprocess and transform data

### *Use One hot encoder for categorical var "FUEL"*

In [19]:
pd.concat([df, pd.get_dummies(df.FUEL, drop_first=True)], axis=1)

Unnamed: 0,SIZE,FUEL,DISTANCE,DESIBEL,AIRFLOW,FREQUENCY,STATUS,kerosene,lpg,thinner
0,1,gasoline,10,96,0.0,75,0,0,0,0
1,1,gasoline,10,96,0.0,72,1,0,0,0
2,1,gasoline,10,96,2.6,70,1,0,0,0
3,1,gasoline,10,96,3.2,68,1,0,0,0
4,1,gasoline,10,109,4.5,67,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...
17437,7,lpg,190,86,2.2,5,0,0,1,0
17438,7,lpg,190,84,2.0,4,0,0,1,0
17439,7,lpg,190,80,1.5,3,0,0,1,0
17440,7,lpg,190,76,0.4,2,0,0,1,0


## 2. Train-test & Training model


### Spit and Standar Scaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
st_sc = StandardScaler()
X_train = st_sc.fit_transform(X_train)
X_test = st_sc.fit_transform(X_test)

### Training and results of the Logistic Regression model

In [None]:
regressor = LogisticRegression()
regressor.fit(X_train,y_train)
y_pred = regressor.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.87      0.89      0.88      2614
           1       0.89      0.87      0.88      2619

    accuracy                           0.88      5233
   macro avg       0.88      0.88      0.88      5233
weighted avg       0.88      0.88      0.88      5233



### Save & Load model

In [None]:
# Save the model
dump(regressor, 'model.joblib')

# Load the model
regressor = load('model.joblib')

### Matrix confusion & accuracy metric

In [None]:
#Matrix Confusion
confusion_matrix(y_test,y_pred)

array([[2332,  282],
       [ 347, 2272]])

In [None]:
print(f'Model actual accuracy: {accuracy_score(y_test, y_pred)}')

Model actual accuracy: 0.8798012612268298
