In [49]:
# Import warnings and add a filter to ignore them
import warnings
warnings.simplefilter('ignore')

In [50]:
#Import libraries for plant optimization using machine learning and deep learning
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
import seaborn as sns

# classify the blending products based on the input features
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_wine
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Cross validation
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

In [51]:
# Load data and prepare


#df = pd.read_csv(r"C:\Users\Suresh.Tripathi\Downloads\Geomet\data.csv")
df = pd.read_csv("C:/Users/Suresh.Tripathi/Downloads/Geomet/data.csv")
df.head(5)

Unnamed: 0,Seams,Inherant_Moist(%),Ash(%),Vol_Matter(%),F.Carbon(%),CV_kcal/kg,SiO2 %,Yields (%),coal_recoveries,specifc_gravity,UCS_Mpa,Throughput_rates_tph,Products
0,8,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,19.0,105,LowCV_Product
1,18,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,1.8,105,LowCV_Product
2,28,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,17.0,105,LowCV_Product
3,38,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,17.0,105,LowCV_Product
4,48,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,17.0,105,LowCV_Product


In [74]:
#check each column format and data type
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 13 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Seams                 60 non-null     int64  
 1    Inherant_Moist(%)    60 non-null     float64
 2   Ash(%)                60 non-null     float64
 3   Vol_Matter(%)         60 non-null     float64
 4   F.Carbon(%)           60 non-null     float64
 5    CV_kcal/kg           60 non-null     int64  
 6    SiO2 %               60 non-null     float64
 7   Yields (%)            60 non-null     int64  
 8   coal_recoveries       60 non-null     int64  
 9   specifc_gravity       60 non-null     float64
 10  UCS_Mpa               60 non-null     float64
 11  Throughput_rates_tph  60 non-null     int64  
 12  Products              60 non-null     int64  
dtypes: float64(7), int64(6)
memory usage: 6.2 KB


In [75]:
 #Declare feature vector and target variable
X = df.drop('Products', axis=1)

y = df['Products']

In [76]:
X.head()

Unnamed: 0,Seams,Inherant_Moist(%),Ash(%),Vol_Matter(%),F.Carbon(%),CV_kcal/kg,SiO2 %,Yields (%),coal_recoveries,specifc_gravity,UCS_Mpa,Throughput_rates_tph
0,8,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,19.0,105
1,18,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,1.8,105
2,28,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,17.0,105
3,38,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,17.0,105
4,48,5.3,55.7,17.9,21.1,2431,62.6,90,40,1.9,17.0,105


In [77]:
y.head()

0    2
1    2
2    2
3    2
4    2
Name: Products, dtype: int64

In [78]:
# the products is non-balance and need correction
df['Products'].value_counts()

0    30
1    18
2    12
Name: Products, dtype: int64

In [79]:
# convert labels into multi-class values (0,1,2 etc)

y[y == 'HighCV_Product'] = 0
y[y == 'MediumCV_Product'] = 1
y[y =='LowCV_Product'] = 2

In [80]:
df['Products'] = pd.to_numeric(df['Products'])

In [81]:
# full correlation table
df.corr().style.background_gradient(cmap='viridis')

Unnamed: 0,Seams,Inherant_Moist(%),Ash(%),Vol_Matter(%),F.Carbon(%),CV_kcal/kg,SiO2 %,Yields (%),coal_recoveries,specifc_gravity,UCS_Mpa,Throughput_rates_tph,Products
Seams,1.0,-0.115283,0.126731,-0.142757,-0.105832,-0.12024,-0.017813,0.077142,0.01042,0.106284,0.086186,0.062509,0.144168
Inherant_Moist(%),-0.115283,1.0,-0.775286,0.77554,0.591515,0.696183,0.153882,-0.85549,-0.57203,-0.830234,-0.433692,-0.784008,-0.74946
Ash(%),0.126731,-0.775286,1.0,-0.974444,-0.962613,-0.991745,0.038072,0.845621,0.469086,0.988104,0.576859,0.961452,0.943077
Vol_Matter(%),-0.142757,0.77554,-0.974444,1.0,0.904562,0.95567,-0.077292,-0.770116,-0.581378,-0.961996,-0.603892,-0.90799,-0.963365
F.Carbon(%),-0.105832,0.591515,-0.962613,0.904562,1.0,0.986546,-0.082158,-0.769638,-0.311643,-0.928383,-0.53851,-0.926529,-0.878155
CV_kcal/kg,-0.12024,0.696183,-0.991745,0.95567,0.986546,1.0,-0.02948,-0.805565,-0.387689,-0.968506,-0.561527,-0.95762,-0.932347
SiO2 %,-0.017813,0.153882,0.038072,-0.077292,-0.082158,-0.02948,1.0,0.043941,0.483592,0.004995,0.15242,-0.090604,-0.088315
Yields (%),0.077142,-0.85549,0.845621,-0.770116,-0.769638,-0.805565,0.043941,1.0,0.426077,0.873342,0.42712,0.858736,0.700113
coal_recoveries,0.01042,-0.57203,0.469086,-0.581378,-0.311643,-0.387689,0.483592,0.426077,1.0,0.507878,0.475597,0.333736,0.495689
specifc_gravity,0.106284,-0.830234,0.988104,-0.961996,-0.928383,-0.968506,0.004995,0.873342,0.507878,1.0,0.581188,0.96454,0.926903


In [82]:
 #Split data into separate training and test set
# split X and y into training and testing sets

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [83]:
# Logistic Regression
model = LogisticRegression()
model.fit(X_train, y_train)
y_train_hat = model.predict(X_train)
y_test_hat = model.predict(X_test)

print(model)
print('Train performance')
print('-------------------------------------------------------')
print(classification_report(y_train, y_train_hat))

print('Test performance')
print('-------------------------------------------------------')
print(classification_report(y_test, y_test_hat))

print('Confusion matrix')
print('-------------------------------------------------------')
print(confusion_matrix(y_test, y_test_hat))

LogisticRegression()
Train performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00         7

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42

Test performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         5

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

Confusion matrix
-----------------------------------------------------

In [84]:
# Support Vector Machines
model = SVC()
model.fit(X_train, y_train)
y_train_hat = model.predict(X_train)
y_test_hat = model.predict(X_test)

print(model)
print('Train performance')
print('-------------------------------------------------------')
print(classification_report(y_train, y_train_hat))

print('Test performance')
print('-------------------------------------------------------')
print(classification_report(y_test, y_test_hat))

print('Confusion matrix')
print('-------------------------------------------------------')
print(confusion_matrix(y_test, y_test_hat))

SVC()
Train performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.83      1.00      0.91        20
           1       1.00      0.73      0.85        15
           2       1.00      1.00      1.00         7

    accuracy                           0.90        42
   macro avg       0.94      0.91      0.92        42
weighted avg       0.92      0.90      0.90        42

Test performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.83      1.00      0.91        10
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         5

    accuracy                           0.89        18
   macro avg       0.94      0.78      0.80        18
weighted avg       0.91      0.89      0.87        18

Confusion matrix
-------------------------------------------------------
[[10  0  0]


In [85]:
# Random Forest
model = RandomForestClassifier(n_jobs=-1,random_state=123)
model.fit(X_train, y_train)
y_train_hat = model.predict(X_train)
y_test_hat = model.predict(X_test)

print(model)
print('Train performance')
print('-------------------------------------------------------')
print(classification_report(y_train, y_train_hat))

print('Test performance')
print('-------------------------------------------------------')
print(classification_report(y_test, y_test_hat))

print('Confusion matrix')
print('-------------------------------------------------------')
print(confusion_matrix(y_test, y_test_hat))

RandomForestClassifier(n_jobs=-1, random_state=123)
Train performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        20
           1       1.00      1.00      1.00        15
           2       1.00      1.00      1.00         7

    accuracy                           1.00        42
   macro avg       1.00      1.00      1.00        42
weighted avg       1.00      1.00      1.00        42

Test performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         5

    accuracy                           1.00        18
   macro avg       1.00      1.00      1.00        18
weighted avg       1.00      1.00      1.00        18

Confusion matrix
----------------------

In [88]:
# Neural nets
model = MLPClassifier()
model.fit(X_train, y_train)
y_train_hat = model.predict(X_train)
y_test_hat = model.predict(X_test)

print(model)
print('Train performance')
print('-------------------------------------------------------')
print(classification_report(y_train, y_train_hat))

print('Test performance')
print('-------------------------------------------------------')
print(classification_report(y_test, y_test_hat))

print('Confusion matrix')
print('-------------------------------------------------------')
print(confusion_matrix(y_test, y_test_hat))

MLPClassifier()
Train performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.57      1.00      0.73        20
           1       0.00      0.00      0.00        15
           2       1.00      1.00      1.00         7

    accuracy                           0.64        42
   macro avg       0.52      0.67      0.58        42
weighted avg       0.44      0.64      0.51        42

Test performance
-------------------------------------------------------
              precision    recall  f1-score   support

           0       0.77      1.00      0.87        10
           1       0.00      0.00      0.00         3
           2       1.00      1.00      1.00         5

    accuracy                           0.83        18
   macro avg       0.59      0.67      0.62        18
weighted avg       0.71      0.83      0.76        18

Confusion matrix
-------------------------------------------------------
[[

In [None]:
from xgboost import XGBClassifier

model = XGBClassifier(random_state=123)
model.fit(X_train, y_train)
y_train_hat = model.predict(X_train)
y_test_hat = model.predict(X_test)

print(model)
print('Train performance')
print('-------------------------------------------------------')
print(classification_report(y_train, y_train_hat))

print('Test performance')
print('-------------------------------------------------------')
print(classification_report(y_test, y_test_hat))

print('Roc_auc score')
print('-------------------------------------------------------')
print(roc_auc_score(y_test, y_test_hat))
print('')

print('Confusion matrix')
print('-------------------------------------------------------')
print(confusion_matrix(y_test, y_test_hat))