In [423]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import statsmodels.api as sm
from sklearn.metrics import r2_score
from sklearn.metrics import classification_report 
from sklearn.metrics import accuracy_score 
from scipy import stats
from sklearn.model_selection import KFold
import pickle
import sys

In [219]:
def accuracy_classification_report(y_test, preds):
  confusion_mtx = {
      'y_Actual': np.array(y_test),
      'y_Predicted': preds
  }

  confusion_df = pd.DataFrame(confusion_mtx, columns=['y_Actual','y_Predicted'])    

  score = (stats.spearmanr(confusion_df['y_Actual'], confusion_df['y_Predicted']))[0]

  print('This is the soldiers score {}'.format(score))
  print('Accuracy Score :',accuracy_score(confusion_df['y_Actual'], confusion_df['y_Predicted']))
  print(classification_report(confusion_df['y_Actual'], confusion_df['y_Predicted']))

In [11]:
df = pd.read_excel('spalling.xlsx', index_col=0)

In [16]:
df.head()

Unnamed: 0_level_0,spalling,time,weightloss,firecurve,load,stress,Age,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1.1,20,14.8,8,std,624,6.24,176,4.5,63
1.4,21,14.8,9,std,617,6.17,177,4.5,63
1.5,17,12.7,3,std,634,6.34,400,4.5,63
2.1,32,9.0,19,std,622,6.22,180,4.1,61
2.4,19,12.8,10,std,616,6.16,180,4.1,61


In [14]:
# df_spall = df.loc[df['spalling']!=0]
df_spall = df.loc[df['firecurve']=='std']
df_spall = df_spall.drop('firecurve', axis=1)
df_spall = df_spall.drop('weightloss', axis=1)
df_spall = df_spall.drop('time', axis=1)
df_spall = df_spall.drop('Age', axis=1)
df_use = df_spall

In [72]:
df_use.head(5)

Unnamed: 0_level_0,spalling,load,stress,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.1,1,624,6.24,4.5,63
1.4,1,617,6.17,4.5,63
1.5,1,634,6.34,4.5,63
2.1,1,622,6.22,4.1,61
2.4,1,616,6.16,4.1,61


In [82]:
def convert(number):
    if number > 0:
        return 1
    else:
        return 0

In [119]:
df_use.drop(df_use[df_use.moisture == '-'].index, inplace=True)
df_use.head(5)

Unnamed: 0_level_0,spalling,load,stress,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1.1,1,624,6.24,4.5,63
1.4,1,617,6.17,4.5,63
1.5,1,634,6.34,4.5,63
2.1,1,622,6.22,4.1,61
2.4,1,616,6.16,4.1,61


In [88]:
df_use['spalling'] = df_use['spalling'].apply(lambda x:convert(x))

In [108]:
data = df_use.drop('spalling',axis=1)
target = df_use['spalling']

In [313]:
# data.to_numpy()

In [408]:
def concrete_classifier(data, target):
    X, y = data, target
    
    kf = KFold(n_splits=5,shuffle=True)
    kf.get_n_splits(X)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
    GBC_model = GradientBoostingClassifier(n_estimators=50)    
    GBC_model.fit(X_train,y_train) 
    pred = GBC_model.predict(X_test)
    accuracy_classification_report(y_test, pred)
    
    return GBC_model, pred

In [409]:
classifier_model, pred = concrete_classifier(data.to_numpy(),target.to_numpy())

This is the soldiers score 0.6846531968814578
Accuracy Score : 0.8235294117647058
              precision    recall  f1-score   support

           0       1.00      0.62      0.77         8
           1       0.75      1.00      0.86         9

    accuracy                           0.82        17
   macro avg       0.88      0.81      0.81        17
weighted avg       0.87      0.82      0.82        17



In [413]:
classifier_model.predict_proba([data.iloc[54,:]])[0][1]

0.8123343519223368

In [412]:
int(classifier_model.predict([data.iloc[54,:]]))

1

In [338]:
d = {'load': [601], 'stress': [6], 'moisture': [5.1], 'Compressive strength': [60]}
d_test = pd.DataFrame(data=d)

print('Your slab will with a 90 percent certainty not spall more than: ')  
print(str(int(classifier_model.predict(d_test))) + ' mm')

Your slab will with a 90 percent certainty not spall more than: 
1 mm


In [427]:
with open("classification_model.pickle", "wb") as file:
    pickle.dump(classifier_model, file)
print ("Model saved.")

Model saved.
