In [1]:
# Imports . . . .
import pandas as pd
# For DataFrames
from pandas import DataFrame

#Logistic Regression for Damage detection (classification)
from sklearn.linear_model import LogisticRegression

#importing ElasticNEt
from sklearn.linear_model import ElasticNet

#Importing 'train_test_split'
from sklearn.model_selection import train_test_split
#importing plots
import matplotlib.pyplot as plt


In [2]:
#To convert a XLS or XLSX file to csv use this code
#the 'File' has to be in same folder as this code file

#Data_xls = pd.read_excel("Data.xlsx", index_col=None)
#Data_xls.to_csv("Data.csv", encoding = "utf-8")

In [3]:
Data_csv = pd.read_csv("Data.csv")
Data_csv.head(5)

Unnamed: 0.1,Unnamed: 0,ID,kHz,G,B,Damage,PerCD,Distance,PerCd
0,0,1,30000,5.1e-05,0.001085,0,0.0,0.0,0
1,1,1,30100,5.2e-05,0.001082,0,0.0,0.0,0
2,2,1,30200,5.2e-05,0.001082,0,0.0,0.0,0
3,3,1,30300,5.1e-05,0.001089,0,0.0,0.0,0
4,4,1,30400,5e-05,0.001094,0,0.0,0.0,0


In [4]:
# Taking DataFrames to Be used (removing 'Unnamed: 0')
data = DataFrame(Data_csv, columns = ['ID','kHz','G','B','Damage','PerCD','Distance'])
print(data.shape)
print(data.head(5))
data.describe().transpose()

(194020, 7)
   ID    kHz         G         B  Damage  PerCD  Distance
0   1  30000  0.000051  0.001085       0    0.0       0.0
1   1  30100  0.000052  0.001082       0    0.0       0.0
2   1  30200  0.000052  0.001082       0    0.0       0.0
3   1  30300  0.000051  0.001089       0    0.0       0.0
4   1  30400  0.000050  0.001094       0    0.0       0.0


Unnamed: 0,count,mean,std,min,25%,50%,75%,max
ID,194020.0,2.5,1.118037,1.0,1.75,2.5,3.25,4.0
kHz,194020.0,515000.0,280044.468273,30000.0,272500.0,515000.0,757500.0,1000000.0
G,194020.0,0.001733,0.000785,-6e-06,0.001413,0.001992,0.002269,0.003372
B,194020.0,0.014791,0.007461,0.000989,0.008808,0.014561,0.021075,0.030182
Damage,194020.0,0.8,0.400001,0.0,1.0,1.0,1.0,1.0
PerCD,194020.0,31.414,22.214524,0.0,15.7,31.42,47.12,62.83
Distance,194020.0,9.0,6.363977,0.0,4.5,9.0,13.5,18.0


In [5]:
# Taking Independent Variables
# For Damage Detection Model i.e. damaged or not
X_Damage = DataFrame(Data_csv, columns = ['ID','kHz','G','B'])
print(X_Damage.shape)
print(X_Damage.head(5))

# For amount of damage
X_Amount = DataFrame(Data_csv, columns = ['ID','kHz','G','B','PerCD'])
print(X_Amount.shape)
print(X_Amount.head(5))

#For Location of Damage
X_Location = DataFrame(Data_csv, columns = ['ID','kHz','G','B','PerCD','Distance'])
print(X_Location.shape)
print(X_Location.head(5))


(194020, 4)
   ID    kHz         G         B
0   1  30000  0.000051  0.001085
1   1  30100  0.000052  0.001082
2   1  30200  0.000052  0.001082
3   1  30300  0.000051  0.001089
4   1  30400  0.000050  0.001094
(194020, 5)
   ID    kHz         G         B  PerCD
0   1  30000  0.000051  0.001085    0.0
1   1  30100  0.000052  0.001082    0.0
2   1  30200  0.000052  0.001082    0.0
3   1  30300  0.000051  0.001089    0.0
4   1  30400  0.000050  0.001094    0.0
(194020, 6)
   ID    kHz         G         B  PerCD  Distance
0   1  30000  0.000051  0.001085    0.0       0.0
1   1  30100  0.000052  0.001082    0.0       0.0
2   1  30200  0.000052  0.001082    0.0       0.0
3   1  30300  0.000051  0.001089    0.0       0.0
4   1  30400  0.000050  0.001094    0.0       0.0


In [6]:
# Taking dependent Variable

# For Damage Detection Model i.e. damaged or not
Y_Damage = data.Damage
print(Y_Damage.shape)
print(Y_Damage.tail(5))

# For amount of damage
Y_Amount = data.PerCD
print(Y_Amount.shape)
print(Y_Amount.tail(5))

#For Location of Damage
Y_Location = data.Distance
print(Y_Location.shape)
print(Y_Location.tail(5))

(194020,)
194015    1
194016    1
194017    1
194018    1
194019    1
Name: Damage, dtype: int64
(194020,)
194015    62.83
194016    62.83
194017    62.83
194018    62.83
194019    62.83
Name: PerCD, dtype: float64
(194020,)
194015    18.0
194016    18.0
194017    18.0
194018    18.0
194019    18.0
Name: Distance, dtype: float64


In [7]:
# creating training and testing set

# For Damage Detection Model i.e. damaged or not
X_Damage_train, X_Damage_test, Y_Damage_train, Y_Damage_test = train_test_split(X_Damage, Y_Damage, test_size = 0.2)
print (X_Damage_train.shape, Y_Damage_train.shape)
print (X_Damage_test.shape, Y_Damage_test.shape)

# For amount of damage
X_Amount_train, X_Amount_test, Y_Amount_train, Y_Amount_test = train_test_split(X_Amount, Y_Amount, test_size = 0.2)
print (X_Amount_train.shape, Y_Amount_train.shape)
print (X_Amount_test.shape, Y_Amount_test.shape)

#For Location of Damage
X_Location_train, X_Location_test, Y_Location_train, Y_Location_test = train_test_split(X_Location, Y_Location, test_size = 0.2)
print (X_Location_train.shape, Y_Location_train.shape)
print (X_Location_test.shape, Y_Location_test.shape)

(155216, 4) (155216,)
(38804, 4) (38804,)
(155216, 5) (155216,)
(38804, 5) (38804,)
(155216, 6) (155216,)
(38804, 6) (38804,)


In [8]:
# Detecting if there is a damage using logistic regression

logreg = LogisticRegression(C=1e5, solver='lbfgs', multi_class='multinomial')
logreg.fit(X_Damage_train,Y_Damage_train)

print('Intercept: \n', logreg.intercept_)
print('Coefficients: \n', logreg.coef_)
print(logreg)

Acc_logreg = logreg.score(X_Damage_test, Y_Damage_test)
print("Accuracy for Damage Detection model is: ", Acc_logreg)

#Detecting if there is damage using MLPClassifier

#mlp = MLPClassifier(hidden_layer_sizes=(7,7,7))
#mlp.fit(X_Damage_train,Y_Damage_train)
#Acc_mlp = mlp.score(X_Damage_test, Y_Damage_test)
#print("Accuracy for Damage Detection model is: ", Acc_mlp)

Intercept: 
 [  4.78265050e-12]
Coefficients: 
 [[  1.19736874e-11   1.09790450e-06   4.52666370e-15   3.34627439e-14]]
LogisticRegression(C=100000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='multinomial', n_jobs=1, penalty='l2',
          random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
          warm_start=False)
Accuracy for Damage Detection model is:  0.797314709824


In [10]:
# Detecting the amount of damage

enet_D = ElasticNet()
enet_D.fit(X_Amount_train, Y_Amount_train)

predict_amount = enet_D.predict(X_Amount_test)

print(predict_amount)
print(Y_Amount_test)
Acc_enet_D = enet_D.score(X_Amount_test, Y_Amount_test) 
print("Accuracy for Damage Detection model is: ", Acc_enet_D)


[  0.06359079   0.0636055   62.76630598 ...,   0.06361281  62.76631057
  15.73178825]
97488      0.00
6472       0.00
92889     62.83
186991    62.83
136037    62.83
85040     47.12
34766     47.12
94779     62.83
911        0.00
65844     15.70
113392    15.70
147217     0.00
72364     31.42
119985    31.42
107722    15.70
165900    31.42
41432     62.83
139053    62.83
140265    62.83
119289    31.42
125631    31.42
66138     15.70
163362    15.70
135991    62.83
22522     31.42
59869     15.70
104647     0.00
86780     47.12
21927     31.42
57963      0.00
          ...  
166478    31.42
140494    62.83
43193     62.83
188625    62.83
149862     0.00
16618     15.70
9141       0.00
126509    47.12
97271      0.00
43335     62.83
79959     47.12
73450     31.42
100510     0.00
111100    15.70
132074    47.12
159005    15.70
131440    47.12
78529     47.12
20361     31.42
188067    62.83
178911    47.12
64755     15.70
98882      0.00
163370    15.70
109309    15.70
29504     47.12
80

In [11]:
# Damage Location Prediction

enet_L = ElasticNet()
enet_L.fit(X_Location_train, Y_Location_train)

predict_Location = enet_L.predict(X_Location_test)

print(predict_Location)
print(Y_Location_test)
Acc_enet_L = enet_L.score(X_Location_test, Y_Location_test) 
print("Accuracy for Damage Detection model is: ", Acc_enet_L)

[  9.00010341   4.63555052   9.05243323 ...,  13.31227475  17.83310235
  13.46873801]
70018      9.0
24348      4.5
78223      9.0
2680       0.0
93130      9.0
71289      9.0
5929       0.0
12257      4.5
131491    13.5
109424    13.5
35592      4.5
121607    13.5
133839    13.5
110272    13.5
146899     0.0
175428    18.0
43825      4.5
25547      4.5
11531      4.5
119245    13.5
130764    13.5
37028      4.5
144488    13.5
92564      9.0
119528    13.5
133209    13.5
1437       0.0
126745    13.5
26926      4.5
168964    18.0
          ... 
143704    13.5
102309     0.0
130897    13.5
16440      4.5
130482    13.5
102450     0.0
55512      0.0
179434    18.0
55855      0.0
70795      9.0
126347    13.5
97766      0.0
167092    18.0
191359    18.0
106462     0.0
30680      4.5
175576    18.0
159453    18.0
18294      4.5
131190    13.5
60108      9.0
16120      4.5
171777    18.0
89719      9.0
94067      9.0
70049      9.0
41294      4.5
107200    13.5
193029    18.0
143761    13.5