# Notebook for analysing data to find potential signals within
# 

### How does this script work?
First saved data in .txt form is loaded in and the dataset is extracted. The dataset is then ran through a pipeline looking for potential signals within the data and finally Dataframes are created with the characteristics of the full dataset and any signals within
# 
### How does the pipeline work?
The data is first analysed my a machine learning model. This model is un-supervised and is looking for regions in the data where there is high volatility, indicating a potential region of interest where a signal may be.

The predictions made by the model are then looked at and any regions of interest are saved and looked into with more detail. This invloves using a cross correlation function to determine if the signal has the characteristic curve, if the extracted signal is extremely long then the cross correlation function is used to see if there is a signal within the region of interest. If the cross correlation functions show a signal is present then they are analysed looking at the length, number of peaks and height of the peaks as well as the depth of the dip. If the cross correlation function shows no signal is present then the region of interest is discarded.

In [None]:
import MLDetectionFunctionsPythonV6 as EEGanalysis
import pandas as pd

# Importing data to analyse

### Each dataset should be saved as a .txt file

In [None]:
realdatafile1 = 'Long_Data_1.txt'
realdatafile2 = 'Long_Data_2.txt'
realdatafile3 = 'Long_Data_3.txt'
realdatafile4 = 'Long_Data_4.txt'

realdatafile5 = 'Short_Data_1.txt'

realdatafile6 = '22.07.22_Example_Data_Long.txt'
realdatafile7 = '22.07.22_Example_Data_Long2.txt'

curvedata = 'SpikeWaveDischarge.txt'

# Choosing which dataset to analyse

In [None]:
#realdata1 = EEGanalysis.getdata(realdatafile1)

In [None]:
#realdata2 = EEGanalysis.getdata(realdatafile2)

In [None]:
#realdata3 = EEGanalysis.getdata(realdatafile3)

In [None]:
#realdata4 = EEGanalysis.getdata(realdatafile4)

In [None]:
#realdata5 = EEGanalysis.getdata(realdatafile5)

In [None]:
realdata6 = EEGanalysis.getdata(realdatafile6)

In [None]:
#realdata7 = EEGanalysis.getdata(realdatafile7)

In [None]:
DFnorm, signal = EEGanalysis.CreateDataframe(realdata6)

# Formatting the curve data to use in cross correlation to determine validity of potential signals

In [None]:
curvedata = EEGanalysis.getdata(curvedata)

In [None]:
timecurve, signalcurve = EEGanalysis.timesignal(curvedata)

In [None]:
signalcurveave = EEGanalysis.movingaverage(signalcurve[:,1],10000)

# Predicting the locations of potential signals within the dataset

In [None]:
StartVals, EndVals, signalCURVES = EEGanalysis.PredictSignal(DFnorm, signal,350000)

# Analysing the whole dataset before and potential signals have been removed

In [None]:
MainSignalDFtestfull = EEGanalysis.AnalyseSignalFullData(signal)

# Extracting potential individual signals, determining their validity and analysing them where necessary

In [None]:
MainSignalDFraw, MainSignalDFccextracts,redsig = EEGanalysis.AnalyseSignalsFailsafeVer(StartVals,EndVals,signal,signalCURVES,DFnorm,signalcurveave,300000)


# Analysing the remaining data after valid signals have been removed

In [None]:
MainSignalDFtestfullthird = EEGanalysis.AnalyseSignalFullData(redsig)

# Dataframe containing characteristics of full original data

In [None]:
MainSignalDFtestfull

# Dataframe containing characteristics of initially extracted signals, not including any second tier extractions

In [None]:
MainSignalDFraw

# Dataframe containing characteristics of only second tier extractions

### This is where a signal was within an initial extraction but the original signal contained more data then just the signal

### The signal is extracted using cross correlation to trim out the 'excess' data from the original extraction

In [None]:
MainSignalDFccextracts

# Dataframe containing the characteristics of the full data with signals removed

In [None]:
MainSignalDFtestfullthird

# Combining the dataframes containing original extracted signals and second tier extractions

### Original extractions are labelled with .0 i.e. 5.0 or 8.0

### Second tier extractions are labelled with .1 .2 .3 depending on how many second tier extractions there are within one original signal  i.e original signal 6 may have two signals within it and they would be labelled 6.1 and 6.2   original signal 9 may have one signal within it and would be labelled 9.1

In [None]:
MainSignalDFtot = MainSignalDFraw.append(MainSignalDFccextracts, ignore_index=True)
MainSignalDFtot

In [None]:
MainSignalDFtot['SignalNumber'] = MainSignalDFtot['SignalNumber'].str.split('Signal').str.join('')

MainSignalDFtot['SignalNumber'] = pd.to_numeric(MainSignalDFtot['SignalNumber'])

MainSignalDFtot=MainSignalDFtot.sort_values('SignalNumber')
MainSignalDFtot