#### Creating a Machine Learning Model to predcit the event based on Φ-OTDR readings

We import the required libraries wherever and whenever required as per usage

In [1]:
import pandas as pd
df = pd.read_csv("images2.csv", names=["file_path","target"])
print(df.describe)


<bound method NDFrame.describe of                           file_path  target
0        ../trainData/drop/5491.jpg       0
1        ../trainData/drop/2712.jpg       0
2        ../trainData/drop/1075.jpg       0
3        ../trainData/drop/1237.jpg       0
4        ../trainData/drop/2854.jpg       0
...                             ...     ...
20995  ../trainData/qiaoji/2754.jpg       4
20996  ../trainData/qiaoji/3925.jpg       4
20997  ../trainData/qiaoji/4501.jpg       4
20998   ../trainData/qiaoji/252.jpg       4
20999  ../trainData/qiaoji/2526.jpg       4

[21000 rows x 2 columns]>


In [2]:
import numpy as np
from PIL import Image
from PIL import ImageFilter

from skimage.filters import prewitt_h,prewitt_v

Extracting basic features such as mean, std, corr and zero crossings

In [3]:
def img_mean(npa):  return np.mean(npa)

def img_std(npa):   return np.std(npa)

def img_corr(image):
    img_med = image.filter(ImageFilter.MedianFilter(size = 3))
    return np.corrcoef(image,img_med).mean()

def img_zcrs(npa):
    zcr = np.mean(npa, axis = 0) - npa.mean().mean()
    zero_crossings = np.where(np.diff(np.signbit(zcr)))[0]
    return len(zero_crossings)

def img_skew(npa):  return pd.DataFrame(npa).skew().mean()

def img_kurt(npa):  return pd.DataFrame(npa).kurtosis().mean()

def img_ver_edge(npa): return prewitt_v(npa).mean()

def img_hor_edge(npa): return prewitt_h(npa).mean()

Extracting the features such as number of data points in certain frequency ranges

In [4]:
def freq_ranges(npa):
    col = np.mean(npa, axis = 0)
    bt_0_50 = ((50>col) & (col>=0)).sum()
    bt_50_100 = ((100>col) & (col>=50)).sum()
    bt_100_150 = ((150>col) & (col>=100)).sum()
    ab_150 = (col>=150).sum()
    return bt_0_50, bt_50_100, bt_100_150, ab_150

def stren(npa): 
    temperory=freq_ranges(npa)
    strn=temperory[2]+temperory[3]
    return strn
    
def event(npa):
    zcr = np.mean(npa, axis = 0) - 100
    ev = len(np.where(np.diff(np.signbit(zcr)))[0])
    return ev

def img_stren(npa):

    if event(npa)==0:
        return 0
    else:
        val=2*stren(npa)/event(npa)
        return val

def spectral_energy(npa):
  mat=npa
  total=npa.sum()
  x=[]
  for i in range(4):
    count=0
    for j in range((len(mat)//4)*i,(len(mat)//4)*(i+1)):
      count+=sum(mat[j])
    x.append(count/total)
  return x[0],x[1],x[2],x[3]

In [5]:
for imgs in df.file_path:
    image = Image.open(imgs)
    npa = np.array(image)

Overall Extraction of features from the Images

In [6]:
dfm = pd.DataFrame( columns = ["mean", "std", "corr", "zcrs"] )

temp_df = pd.DataFrame( columns = ["skew", "kurtosis", "event_strength"] )

dup_df = pd.DataFrame( columns = ["edge_v", "edge_h"])

se_df = pd.DataFrame( columns = ["x[0]", "x[1]", "x[2]", "x[3]" ])

for imgs in df.file_path:
    image = Image.open(imgs)
    npa = np.array(image)
    
    dfm.loc[len(dfm)] = [ img_mean(npa), img_std(npa),img_corr(image), img_zcrs(npa) ]
    temp_df.loc[len(temp_df)] = [ img_skew(npa), img_kurt(npa), img_stren(npa)]
    dup_df.loc[len(dup_df)] = [img_ver_edge(npa), img_hor_edge(npa)]
    list = spectral_energy(npa)
    se_df.loc[len(se_df)] = list

dfm = pd.concat([dfm, temp_df], axis=1)
dfm = pd.concat([dfm, dup_df], axis =1)
dfm = pd.concat([dfm, se_df], axis=1)
dfm['target'] = df.target
dfm

Unnamed: 0,mean,std,corr,zcrs,skew,kurtosis,event_strength,edge_v,edge_h,x[0],x[1],x[2],x[3],target
0,25.765820,37.987719,0.641946,38.0,1.693545,4.235893,6.000000,-0.000297,0.001941,0.123381,0.288142,0.289867,0.298609,0
1,24.484570,40.607841,0.704699,18.0,1.683911,4.448071,4.000000,-0.000099,0.002135,0.124027,0.312687,0.314106,0.249180,0
2,25.393633,38.616103,0.636090,26.0,1.581144,3.511482,4.500000,-0.000152,0.003218,0.109798,0.266535,0.300964,0.322703,0
3,38.179883,52.298636,0.582311,36.0,1.662758,4.098637,5.142857,0.004713,0.004995,0.093242,0.236650,0.300809,0.369299,0
4,32.750898,47.866475,0.686309,24.0,1.601055,4.052781,3.000000,0.000144,0.003870,0.118435,0.258129,0.284491,0.338945,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20995,41.145313,49.929015,0.486868,36.0,1.436900,2.693499,2.857143,-0.002108,0.005676,0.094429,0.239033,0.285022,0.381517,4
20996,22.244258,40.136412,0.669736,10.0,1.632976,3.840018,2.000000,-0.000881,0.000666,0.141502,0.277087,0.334228,0.247183,4
20997,23.887070,32.084761,0.480300,18.0,1.674582,4.021697,1.200000,-0.000515,0.003082,0.115755,0.269571,0.313459,0.301216,4
20998,24.216875,36.220532,0.556774,6.0,1.662630,4.064199,2.500000,0.000119,0.002145,0.120955,0.290392,0.315316,0.273337,4


In [7]:
dfm.to_csv("ML_Features.csv")