In [3]:
from pymatreader import read_mat
import pandas as pd
import numpy as np
import scipy.io as sio
from tqdm.auto import tqdm

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# def standardise(array):
#     standardise = lambda x, y, z: (x-y)/z
#     csistd = np.frompyfunc(standardise, 3, 1)
#     mean = np.mean(array)
#     std = np.std(array)
#     return csistd(array,mean,std)

def standardise(data,ms): #Calculates mean of each subcarrier
    data = np.array(data).flatten()
    combined = list(zip(ms, data))
    resultList = []
    for value in combined:
        standardise = lambda x, y, z: (x-y)/z
        csistd = np.frompyfunc(standardise, 3, 1)
        result = csistd(value[1],value[0][0],value[0][1])
        resultList.append(result)
    return resultList

def meanstd(df):
    test = df['CSI'].copy()
    test = test.dropna(axis=0)
    for i in range(0, len(test)):
        test[i] = test[i].flatten()
    test = test.to_numpy()
    test = np.column_stack(test) #Each array is the ith jth subcarrier of every sample
    meanstd = [(np.mean(sample), np.std(sample)) for sample in test]
    return meanstd

def segmentation(df):
    #Window size is 256, stride is 128
    segmented = pd.DataFrame({'Range': 'string', 'Sample': []})
    stride = 128
    startIndex = 0
    lastIndex = 256
    while(lastIndex < df.shape[0]):
        # print('In loop {} {}'.format(lastIndex, df.shape[0]))
        sample = df['Standardised']
        sample = sample.iloc[startIndex:lastIndex].to_numpy()
        index = '{}-{}'.format(startIndex, lastIndex)
        newRow = pd.Series({'Range' : index, 'Sample' : sample})
        segmented = pd.concat([segmented, newRow.to_frame().T], ignore_index=True)
        startIndex += stride
        lastIndex += stride
    # label = df['label'].loc[lastIndex]
    # print(label)
    # segmented['Label'] = [label for sample in segmented['Range']]
    # print('Done segmenting')
    return segmented
    
def preprocess(df, active, steady):
    # print('In preprocessing')
    preprocessed = pd.DataFrame({'Range': '', 'Sample': [], 'Label': ''})
    #Extracting the amplitude
    df['CSI'] = np.abs(df['CSI'])
    meanstdlist = meanstd(df)
    #Using the equation from CSITime, standardize each sample
    df['Standardised'] = [standardise(sample, meanstdlist) for sample in df["CSI"]]
    df.drop("CSI", inplace = True, axis = 1)
    
    #Separate the sample to the active and steady state
    intF = df.loc[df["label"] == '{}'.format(active)]
    intS = df.loc[df["label"] == '{}'.format(steady)]
    # print(intS.head())
    # print('Grouped')
    segmentF = segmentation(intF)
    segmentF['Label'] = [active for sample in segmentF['Range']]
    segmentS = segmentation(intS)
    segmentS['Label'] = [steady for sample in segmentS['Range']]
    
    # print('Segmented')
    preprocessed = pd.concat([preprocessed, segmentF], ignore_index=True)
    preprocessed = pd.concat([preprocessed, segmentS], ignore_index=True)
    
    # print('Preprocessed')
    
    
    return preprocessed

In [8]:
data = {}
subjectPair = ['S1_S47', 'S2_S22', 'S3_S44', 'S4_S15','S6_S24', 'S7_S12','S8_S31','S13_S21', 'S14_S5', 'S16_S41','S18_S57','S19_S11','S20_S61',
               'S25_S9','S26_S60', 'S27_S40','S28_S43','S32_S64', 'S33_S3','S34_S30','S35_S52','S36_S16','S37_S54','S38_S35','S41_S36',
               'S42_S14','S44_S33','S46_S28','S48_S45','S49_S10','S50_S17','S51_S23','S52_S62','S53_S12','S55_S66','S56_S63','S58_S39',
               'S59_S29','S62_S38','S65_S42']

# subjectPair = ['S1_S47', 'S2_S22', 'S3_S44', 'S4_S15','S6_S24', 'S7_S12','S8_S31','S13_S21', 'S14_S5', 'S16_S41','S18_S57','S19_S11','S20_S61',
#                'S25_S9','S26_S60', 'S27_S40','S28_S43','S32_S64', 'S33_S3','S34_S30']
final = pd.DataFrame({'Range': 'string', 'Sample': [], 'Label': ''}) 

for pair in tqdm(subjectPair, desc='subjectPair', leave=True): #Iterate through each pair
    for intera in tqdm(range(1,8), desc='Interaction', leave=False): #Index of interaction
        for trial in tqdm(range(1,10), desc='Trial', leave=False): #Index of trial
            # print(('C:\\Users\\Karlj\\dissertation\\wifimod\\{0}\\I{1}\\{0}_I{1}_T{2}.mat'.format(pair, intera, trial)))
            data = read_mat('C:\\Users\\Karlj\\dissertation\\wifimod\\{0}\\I{1}\\{0}_I{1}_T{2}.mat'.format(pair, intera, trial))
            df = pd.DataFrame(data['Raw_Cell_Matrix'])
            df.drop(['Nrx', 'Ntx', 'noise','agc','RSSI_a', 'RSSI_b','RSSI_c', "timestamp_low"], axis=1, inplace=True)
            active = 'I{}'.format(intera)
            preprocessed = preprocess(df, active, 'I13')
            df.dropna(axis=0)
            final = pd.concat([final, preprocessed], ignore_index=True)

final.to_pickle('csi-dataset-40-7-9.pkl')
                      

subjectPair:   0%|          | 0/40 [00:00<?, ?it/s]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
subjectPair:   2%|▎         | 1/40 [02:27<1:36:03, 147.79s/it]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
subjectPair:   5%|▌         | 2/40 [04:44<1:29:34, 141.42s/it]
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[A
[

In [6]:
type(final['Sample'][0])

numpy.ndarray

In [None]:
Y = final['Label']
X = final['Sample']