In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import GradientBoostingClassifier
import statsmodels.api as sm
from sklearn.metrics import r2_score
from sklearn.metrics import classification_report 
from sklearn.metrics import accuracy_score 
from scipy import stats
from sklearn.model_selection import KFold
import pickle
import sys

In [2]:
def accuracy_classification_report(y_test, preds):
  confusion_mtx = {
      'y_Actual': np.array(y_test),
      'y_Predicted': preds
  }

  confusion_df = pd.DataFrame(confusion_mtx, columns=['y_Actual','y_Predicted'])    

  score = (stats.spearmanr(confusion_df['y_Actual'], confusion_df['y_Predicted']))[0]

  print('This is the soldiers score {}'.format(score))
  print('Accuracy Score :',accuracy_score(confusion_df['y_Actual'], confusion_df['y_Predicted']))
  print(classification_report(confusion_df['y_Actual'], confusion_df['y_Predicted']))

In [3]:
df = pd.read_excel('spalling.xlsx', index_col=0)

In [4]:
df.tail()

Unnamed: 0_level_0,spalling,time,weightloss,firecurve,load,stress,Age,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
52.4,0,-,2.5,hc,697,6.97,203,5.1,70
53.1,4,-,2.7,hc,520,5.2,196,4.3,52
53.2,0,-,2.2,hc,532,5.32,196,4.3,52
54.3,0,-,1.7,hc,471,4.71,197,4.5,47
54.4,0,-,1.4,hc,487,4.87,200,4.5,47


In [5]:
# df_spall = df.loc[df['spalling']!=0]
df_spall = df.loc[df['firecurve']=='std']
df_spall = df_spall.drop('firecurve', axis=1)
df_spall = df_spall.drop('weightloss', axis=1)
df_spall = df_spall.drop('time', axis=1)
df_spall = df_spall.drop('Age', axis=1)
df_spall = df_spall.drop('stress', axis=1)
df_use = df_spall

In [6]:
df_use.tail(5)

Unnamed: 0_level_0,spalling,load,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
57.2,0,467,4.6,40
39.33,59,436,4.9,92
39.34,41,442,-,-
39.35,55,924,4.9,92
39.4,67,915,4.9,92


In [7]:
def convert(number):
    if number > 0:
        return 1
    else:
        return 0

In [8]:
df_use.drop(df_use[df_use.moisture == '-'].index, inplace=True)
df_use.head(5)

Unnamed: 0_level_0,spalling,load,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.1,20,624,4.5,63
1.4,21,617,4.5,63
1.5,17,634,4.5,63
2.1,32,622,4.1,61
2.4,19,616,4.1,61


In [9]:
df_use['spalling'] = df_use['spalling'].apply(lambda x:convert(x))

In [10]:
df_use.head(300)

Unnamed: 0_level_0,spalling,load,moisture,Compressive strength
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.10,1,624,4.5,63
1.40,1,617,4.5,63
1.50,1,634,4.5,63
2.10,1,622,4.1,61
2.40,1,616,4.1,61
...,...,...,...,...
57.10,0,398,4.6,40
57.20,0,467,4.6,40
39.33,1,436,4.9,92
39.35,1,924,4.9,92


In [11]:
data = df_use.drop('spalling',axis=1)
target = df_use['spalling']

In [12]:
# data.to_numpy()

In [13]:
def concrete_classifier(data, target):
    X, y = data, target
    
    kf = KFold(n_splits=5,shuffle=True)
    kf.get_n_splits(X)
    for train_index, test_index in kf.split(X):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]
    
    GBC_model = GradientBoostingClassifier(n_estimators=50)    
    GBC_model.fit(X_train,y_train) 
    pred = GBC_model.predict(X_test)
    accuracy_classification_report(y_test, pred)
    
    return GBC_model, pred

In [14]:
classifier_model, pred = concrete_classifier(data.to_numpy(),target.to_numpy())

This is the soldiers score 0.8864052604279182
Accuracy Score : 0.9444444444444444
              precision    recall  f1-score   support

           0       0.86      1.00      0.92         6
           1       1.00      0.92      0.96        12

    accuracy                           0.94        18
   macro avg       0.93      0.96      0.94        18
weighted avg       0.95      0.94      0.95        18



In [15]:
classifier_model.predict_proba([data.iloc[54,:]])[0][1]

0.5089267854409595

In [16]:
int(classifier_model.predict([data.iloc[54,:]]))

1

In [18]:
d = {'load': [601], 'moisture': [5.1], 'Compressive strength': [60]}
d_test = pd.DataFrame(data=d)

print('Your slab will with a 90 percent certainty not spall more than: ')  
print(str(int(classifier_model.predict(d_test))) + ' mm')

Your slab will with a 90 percent certainty not spall more than: 
1 mm


In [19]:
with open("classification_model.pickle", "wb") as file:
    pickle.dump(classifier_model, file)
print ("Model saved.")

Model saved.
