### Importing 

In [93]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

CoilData = pd.read_csv("data/Constriction.csv")

In [94]:
CoilData

Unnamed: 0.1,Unnamed: 0,coil,furnace Number,analyse,Hardness_1,Hardness_2,Width,Temperature before finishing mill,Temperature after finishing mill,Thickness,...,s,al,ma,b,n,ti,cr,va,mo,constriction
0,0,396378,1,K371,10003,101,1302.1,1147,921,4.36,...,143,304,291,1,34,6,302,0,25,0
1,1,396376,3,K371,10123,101,1282.3,1150,920,4.37,...,90,395,384,1,33,12,189,25,7,0
2,2,396377,4,K321,10040,102,1297.4,1183,933,4.43,...,115,476,463,1,20,11,288,0,40,0
3,3,396379,3,K371,10243,102,1295.2,1165,910,4.44,...,98,306,296,1,21,9,253,0,9,0
4,4,396380,4,K321,10012,100,1293.3,1192,909,3.95,...,121,340,329,1,28,8,297,0,23,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
57089,57089,20814,1,K371,9781,98,1010.0,1193,883,1.84,...,129,303,296,1,30,8,238,1,27,-1
57090,57090,20815,3,K371,9799,98,1010.6,1176,900,1.84,...,127,315,308,1,32,8,236,1,25,-1
57091,57091,20816,1,K371,9864,99,1008.6,1187,909,1.84,...,120,355,347,1,37,9,230,0,21,-1
57092,57092,20817,3,K371,10039,100,1012.7,1161,893,1.84,...,129,306,296,1,27,7,280,0,25,-1


### Cleaning Data

In [95]:
def clean_data(CoilData):
    #Replace *** with nan & drop them
    CoilData = CoilData.replace('^\*+$', np.NaN, regex=True)
    CoilData = CoilData.dropna()

    #Also drop where we couldnt calculate a constriction due to incorrect/missing csv files
    CoilData = CoilData.drop(CoilData[CoilData['constriction'] == -1].index)
    CoilData = CoilData.reset_index(drop=True)
    CoilData = CoilData.drop('Unnamed: 0',1)
    return CoilData

CoilData = clean_data(CoilData)
CoilData

  CoilData = CoilData.drop('Unnamed: 0',1)


Unnamed: 0,coil,furnace Number,analyse,Hardness_1,Hardness_2,Width,Temperature before finishing mill,Temperature after finishing mill,Thickness,Thickness profile,...,s,al,ma,b,n,ti,cr,va,mo,constriction
0,396378,1,K371,10003,101,1302.1,1147,921,4.36,31,...,143,304,291,1,34,6,302,0,25,0
1,396376,3,K371,10123,101,1282.3,1150,920,4.37,35,...,90,395,384,1,33,12,189,25,7,0
2,396377,4,K321,10040,102,1297.4,1183,933,4.43,25,...,115,476,463,1,20,11,288,0,40,0
3,396379,3,K371,10243,102,1295.2,1165,910,4.44,28,...,98,306,296,1,21,9,253,0,9,0
4,396380,4,K321,10012,100,1293.3,1192,909,3.95,26,...,121,340,329,1,28,8,297,0,23,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55322,15618,1,K371,9948,99,1257.3,1183,913,3.27,11,...,160,389,339,1,26,7,199,0,30,0
55323,15620,1,K371,9951,100,1256.6,1180,922,2.68,12,...,160,405,379,1,24,7,194,0,30,1
55324,15621,3,K371,9885,99,1257.2,1182,919,2.68,13,...,159,334,324,1,26,8,248,0,36,0
55325,15622,1,K371,9885,99,1257.1,1194,923,2.68,11,...,159,334,324,1,26,8,248,0,36,0


In [96]:
CoilData.to_csv("data/CleanCoilData.csv")

 ### Exploring Data 

In [97]:
print(CoilData[CoilData['constriction'] == 1].shape)
print(CoilData[CoilData['constriction'] == 0].shape)
CoilData.shape

(3725, 25)
(51602, 25)


(55327, 25)

In [98]:
columns = CoilData.columns.to_list()
columns

['coil',
 'furnace Number',
 'analyse',
 'Hardness_1',
 'Hardness_2',
 'Width',
 'Temperature before finishing mill',
 'Temperature after finishing mill',
 'Thickness',
 'Thickness profile',
 'c',
 'mn',
 'si',
 'nb',
 'p',
 's',
 'al',
 'ma',
 'b',
 'n',
 'ti',
 'cr',
 'va',
 'mo',
 'constriction']

##### Draw graphs for each feature

In [None]:
for col in columns:
    plt.figure(figsize=(10,9),dpi=100)
    plt.title(CoilData[col].name)
    plt.ylabel("constriction")
    plt.xlabel(CoilData[col].name)
    plt.scatter(CoilData[col],CoilData.constriction)
    #plt.close()

plt.show() 

In [None]:
columns

### Classification

In [99]:
#Create datasets

everything = ['Hardness_1', 'Hardness_2', 'Width', 'Temperature before finishing mill','Temperature after finishing mill', 'Thickness', 'c', 'mn', 'si', 'nb', 'p', 's', 'al', 'ma', 'b', 'n', 'ti', 'cr', 'va', 'mo']
hand_picked = ['c','mn','p','s','al','ma','b','ti','va','mo']
X = CoilData[hand_picked]
y = CoilData['constriction']

from sklearn.model_selection import train_test_split

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)


#### Models

In [67]:
from sklearn.neighbors import KNeighborsClassifier
number_of_neighbours = 200

# Declare an instance of it
clf = KNeighborsClassifier(number_of_neighbours,weights='uniform')
# Fit the model
clf.fit(X_train,y_train)


KNeighborsClassifier(n_neighbors=200)

In [None]:
# Evaluate your model
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

clf.score(X_test, y_test)
y_pred = clf.predict(X_test)


print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

##### RandomForest

In [83]:
from sklearn.ensemble import RandomForestClassifier
clf = RandomForestClassifier(max_depth=30, random_state=42)
clf.fit(X_train,y_train)

y_pred

In [85]:
# Evaluate your model
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

#clf.score(X_train, y_train)


print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

              precision    recall  f1-score   support

          -1       1.00      1.00      1.00      5023
           1       1.00      1.00      1.00     33705

    accuracy                           1.00     38728
   macro avg       1.00      1.00      1.00     38728
weighted avg       1.00      1.00      1.00     38728

[[ 5023     0]
 [    0 33705]]
