In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd

from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

In [None]:
# Load the data
data = pd.read_excel("/content/PrestoFillStab.xlsx")

# View the data
data.tail()

Unnamed: 0,TestVolume (litres),Configuration,Test Pressure (mbar) /10,Final Result Code (1=Pass),Fill time (s) / 1000,Stabilisation Time(s)/1000
11071,0.1,0,15500,1,1000,25000
11072,0.1,0,15500,1,1000,25000
11073,0.1,0,15500,1,1000,25000
11074,0.1,0,15500,1,1000,25000
11075,0.1,0,15500,1,1000,25000


In [None]:
# Check the shape of the dataset
data.shape

(11076, 6)

In [None]:

# Check the column names of the dataset
data.dtypes

TestVolume (litres)           float64
Configuration                   int64
Test Pressure (mbar) /10        int64
Final Result Code (1=Pass)      int64
Fill time (s) / 1000            int64
Stabilisation Time(s)/1000      int64
dtype: object

In [None]:
# Count the number of observations per result
data['Stabilisation Time(s)/1000'].value_counts()

25000    5727
60000    2528
90000    1202
70000     322
31000      12
         ... 
62250       1
37750       1
13250       1
74800       1
53250       1
Name: Stabilisation Time(s)/1000, Length: 443, dtype: int64

In [None]:
# Split the data int X and y
X = data.drop(['Fill time (s) / 1000','Configuration','Stabilisation Time(s)/1000'], axis=1)
y = data[['Stabilisation Time(s)/1000']]

In [None]:
# Split the data for training and testing at a ratio of 80/20
X_train, X_test, y_train, y_test = train_test_split(X,y , test_size = 0.2)

In [None]:
print(y_train)

       Stabilisation Time(s)/1000
285                         25000
9839                        25000
8060                        25000
10770                       25000
669                         25000
...                           ...
10023                       25000
2825                        60000
3727                        60000
1496                        60000
10114                       25000

[8860 rows x 1 columns]


In [None]:
print(X_train)

       TestVolume (litres)  ...  Final Result Code (1=Pass)
285                    0.1  ...                           1
9839                   0.1  ...                           1
8060                   0.1  ...                           4
10770                  0.1  ...                           1
669                    0.1  ...                           1
...                    ...  ...                         ...
10023                  0.1  ...                           1
2825                   0.4  ...                           1
3727                   0.4  ...                          13
1496                   0.2  ...                           1
10114                  0.1  ...                           1

[8860 rows x 3 columns]


In [None]:
np.isnan(data.any())

TestVolume (litres)           False
Configuration                 False
Test Pressure (mbar) /10      False
Final Result Code (1=Pass)    False
Fill time (s) / 1000          False
Stabilisation Time(s)/1000    False
dtype: bool

In [None]:
np.isfinite(data.all())

TestVolume (litres)           True
Configuration                 True
Test Pressure (mbar) /10      True
Final Result Code (1=Pass)    True
Fill time (s) / 1000          True
Stabilisation Time(s)/1000    True
dtype: bool

In [None]:
X_train = X_train.astype(float)

In [None]:
X_train.drop(X_train.columns[np.isnan(X_train).any()], axis=1)

Unnamed: 0,TestVolume (litres),Test Pressure (mbar) /10,Final Result Code (1=Pass)
285,0.1,15500.0,1.0
9839,0.1,15500.0,1.0
8060,0.1,22600.0,4.0
10770,0.1,15500.0,1.0
669,0.1,15500.0,1.0
...,...,...,...
10023,0.1,15500.0,1.0
2825,0.4,15500.0,1.0
3727,0.4,15500.0,13.0
1496,0.2,15500.0,1.0


In [None]:
X_train.dtypes

TestVolume (litres)           float64
Test Pressure (mbar) /10      float64
Final Result Code (1=Pass)    float64
dtype: object

In [None]:
y_train.dtypes

Stabilisation Time(s)/1000    int64
dtype: object

In [None]:
X_train.describe()

Unnamed: 0,TestVolume (litres),Test Pressure (mbar) /10,Final Result Code (1=Pass)
count,8860.0,8860.0,8860.0
mean,1.723363,16590.090293,2.662528
std,3.287559,4243.286231,3.736691
min,0.1,10000.0,1.0
25%,0.1,15500.0,1.0
50%,0.2,15500.0,1.0
75%,0.8,15500.0,1.0
max,10.0,49800.0,13.0


In [None]:
np.where(X_train.values >= np.finfo(np.float64).max)

(array([], dtype=int64), array([], dtype=int64))

In [None]:
X_train.fillna(X_train.mean(), inplace=True)

In [None]:
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(8860, 3)
(8860, 1)
(2216, 3)
(2216, 1)


In [None]:
# Train a logistic regression model
model = LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=5000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)
model.fit(X_train, y_train)


  y = column_or_1d(y, warn=True)


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=5000,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [None]:
# Run prediction and print acuracy score
y_pred = model.predict(X_test)
print(accuracy_score(y_test, y_pred))

0.5717509025270758


In [None]:
# Run a test prediction
model.predict(np.array([[0.1,15500,1]]))

array([25000])

In [None]:
# Save the model (serialize)
import pickle
pickle.dump(model, open("PrestoStab.pkl", "wb"))

In [None]:
# Relaod the model  (deseralize)
model_pk = pickle.load(open("PrestoFil.pkl", "rb"))

In [None]:
# Rerun predictions
model_pk.predict(np.array([[0.8,0,2]]))

array([25000])