Repository Link to Processed Data Files: https://gitfront.io/r/user-3027905/4e8c7576428c4d688ee6f1f785958d83ad775178/ProjectB-Group1/

## Reading EPOT Data and Data Augmentation

In [4]:
import os
import sys
import glob
import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io
from scipy import signal

---
### Function Definitions
**Function takes File Path and Returns a Numpy Array**

In [None]:
def read_data(file_path):
    if not os.path.isfile(file_path):
        raise AssertionError(file_path, 'Not Found!')

    matdata = scipy.io.loadmat(file_path)
    r, c = matdata['num_data'].shape
    P = np.zeros((c,r))
    for i in range(0,5):
        P[i] = matdata['num_data'][:,i]

    return P

**Data Augmentation: Rotates the Position Vector and Allocates the Necessary Sensor Data to the New Quadrants**

In [1]:
def aug_data(x, y, P_data, n):
    #in order to reflect the vector easier, have to shift the origin from (0,0) to (250,250)
    pos_orig = [[int(x)-250], [int(y)-250]]

    pos_vector = np.zeros((2,1))
    testQ2 = np.zeros(P_data.shape)

    #this is defining the indexes for the new array that will be written.
    #the check array has all the indexes that need to be there - T, P1, P2, P3, P4
    check_ar = [0, 1, 2, 3, 4]
    vor_n = n-1
    nach_n = n+1
    if nach_n > 4:
        nach_n = 1

    #this code basically identifies what number is missing from the "check array" after having used the idx array
    #i didn't want to hand code everything and thought this was a more trustworthy way of identifying the last index for the
    #augmented data array
    idx = [0, vor_n, n, nach_n]
    miss_idx = [i for i in check_ar if i not in idx]

    #n implies the current quadrant of operation, the rotation matrix is defined accordingly
    if n == 2:
        rot_mat = [[-1, 0], [0, 1]]
        testQ2[0, :] = P_data[0, :]
        #P1 and P3 in the augmented array and original array are the same, PN ie P2 -> P4 and P4 -> P2
        #miss_idx[0] is 4 here
        testQ2[vor_n, :] = P_data[vor_n, :]
        testQ2[n, :] = P_data[4, :]
        testQ2[nach_n, :] = P_data[nach_n, :]
        testQ2[miss_idx[0], :] = P_data[n, :]

    if n == 3:
        rot_mat = [[1, 0], [0, -1]]
        testQ2[0, :] = P_data[0, :]
        #P2 and P4 in the augmented array and original array are the same, PN ie  P3 -> P1 and P1 -> P3
        #miss_idx[0] is 1 here
        testQ2[vor_n, :] = P_data[vor_n, :]
        testQ2[n, :] = P_data[1, :]
        testQ2[nach_n, :] = P_data[nach_n, :]
        testQ2[miss_idx[0], :] = P_data[n, :]

    if n == 4:
        rot_mat = [[-1, 0], [0, 1]]
        testQ2[0, :] = P_data[0, :]
        #P1 and P3 in the augmented array and original array are the same, PN ie P4 -> P2 and P2 -> P4
        #miss_idx[0] is 2 here
        testQ2[vor_n, :] = P_data[vor_n, :]
        testQ2[n, :] = P_data[2, :]
        testQ2[nach_n, :] = P_data[nach_n, :]
        testQ2[miss_idx[0], :] = P_data[n, :]

    #creating the new position vector and re-translating the position vector from (250,250) origin to (0,0) origin
    pos_vector = np.matmul(rot_mat, pos_orig)
    pos_vector = pos_vector + [[250], [250]]

    #return the x and y coordinates of the position vector (it is a column vector: shape 2x1), and the augmented array
    return pos_vector[0][0], pos_vector[1][0], testQ2

**Function to Write Array into a CSV File, or a Global .npy File**

In [6]:
#####UNCOMMENT TO PREPARE DATA IN SEPARATE FILES
def write_csv(write_ar, x, y):
    #create the file name according to the x and y coordinates of the data
    
    #SPECIFY FOLDER TO SAVE FILES HERE
    folder_name = r"<INSERT PATH>"
    individual_name = str(int(x))+"_"+str(int(y))+".csv"
    name = os.path.join(folder_name, individual_name)
    
    print("\nWRITING: (" + str(x)+","+str(y) +")\n")
    
    with open(name,'a') as csvfile:
        np.savetxt(csvfile, write_ar, delimiter = ",", header='X, Y, P1, P2, P3, P4')

#_____________________________________________________________________________________________________
#####UNCOMMENT TO PREPARE DATA IN SINGLE FILE

#def write_csv(write_ar, x, y):
#folder_name = r"C:\Users\dell\OneDrive\Desktop\CIE_B\CutDataSets\Augmented_Data"
#    name = os.path.join(folder_name, 'Augmented_Data.csv')
#    with open(name,'a') as csvfile:
#        if csvfile.tell() == 0:
#            np.savetxt(csvfile, write_ar, delimiter = ",", header='X, Y, P1, P2, P3, P4')
#        else:
#            np.savetxt(csvfile, write_ar, delimiter = ",")
#    
#    name = os.path.join(folder_name, 'NP_TestData.npy')
#    with open(name, 'ab') as f:
#        np.save(f, write_ar)

**Function to Get Final Array (X, Y, T, P1, P2, P3, P4)**

In [None]:
#this function takes the (x,y) and the data array and combines everything into one and returns the final data array
def getFinalArray(x, y, P):
    pos = np.array([int(x), int(y)])
    [r, c] = P.shape
    print("Reading Coordinates: (" + str(x) + "," + str(y) + ")")

    #confirm if array shape is (20000,5) and in case not, everything is flipped
    if r<c:
        P = np.transpose(P)
        t = r
        r = c
        c = t

    write_ar = np.zeros((r, c+2))

    write_ar[:, 0] = x
    write_ar[:, 1] = y

    for i in range (0,5):
        write_ar[:, i+2] = P[:, i]

    return write_ar


**Function to Plot Arrays**

In [2]:
def plot_ar(ar):
    ar = np.transpose(ar)
    plt.plot(ar[2,:],'r',label="P1") #CONTAINS YOUR 3RD ROW
    plt.plot(ar[3,:],'g',label="P2") #CONTAINS YOUR 4TH ROW
    plt.plot(ar[4,:],'b',label="P3") #CONTAINS YOUR 5TH ROW
    plt.plot(ar[5,:],'y',label="P4")
    plt.legend(loc='best')
    plt.xlabel("t")
    plt.ylabel("V")
    plt.show()

---
### Main File: Data Augmentation

In [None]:
#define the data frame that stores the EPOT_Data details
file_location = pd.DataFrame(columns = ['X', 'Y', 'Path'], dtype = str)

########################################################
#INSERT PATH TO EPOT DATA
mfolder = r"<INSERT PATH>"
folder = (os.path.join(mfolder, '*'))

In [None]:
##################################################
#READS THE FILES STORED AT THE PATH IN FOLDER
#SAVES FILE PATH AND COORDINATES IN A DATAFRAME
#You wrote this code, I've changed nothing except for the name splitting bit
i = 0
for name in glob.glob(folder):
    fpath = name

    #first split on the basis of the \ in the path and take the last bit of it ie  EPOT_250_250.mat
    split_name = re.split(r"\\", name)

    #now split the last bit at the "_" and the "." so you get "EPOT", "250", "250", "mat" and then you just take the coordinates
    coords = re.split("_|\.", split_name[-1])
    #print(coords)

    x_coord = coords[1]
    y_coord = coords[2]
    #print(x_coord + ',' + y_coord)

    file_location.loc[i, 'X'] = str(x_coord)
    file_location.loc[i, 'Y'] = str(y_coord)
    file_location.loc[i, 'Path'] = fpath
    i += 1

#____________________________________________________________________
file_location.to_csv("DataList.csv", sep = ",") #THIS FILE IS LATER USED TO CUT THE DATA

In [None]:
###########################################
for idx, row in file_location.iterrows():
    check = 0
    x = row.X
    y = row.Y
    path = row.Path

    P = read_data(path)
    
    if idx<250:
        print(idx)
        quad1 = getFinalArray(x, y, P)
        
        x2, y2, Q2 = aug_data(x, y, P, 2)
        quad2 = getFinalArray(x2, y2, Q2)


        x3, y3, Q3 = aug_data(x2, y2, Q2, 3)
        quad3 = getFinalArray(x3, y3, Q3)

        x4, y4, Q4 = aug_data(x3, y3, Q3, 4)
        quad4 = getFinalArray(x4, y4, Q4)


    #check if your point is actually on the origin
        if int(x)==int(x3) and int(y)==int(y3):
            write_csv(quad1, x, y)
            continue

    #check if your point lies on the y-axis
        if int(x)==int(x2) and int(y)==int(y2):
            write_csv(quad1, x, y)
            check = 1
        if int(x3)==int(x4) and int(y3)==int(y4):
            write_csv(quad3, x3, y3)
            check = 1

    #check if your point lies on the x-axis
        if int(x)==int(x4) and int(y)==int(y4):
            write_csv(quad1, x, y)
            check = 1
        if int(x2)==int(x3) and int(y2)==int(y3):
            write_csv(quad2, x2, y2)
            check = 1

    #if it doesn't lie on the axes, save all four quadrant data
        if check == 0:
            write_csv(quad1, x, y)
            write_csv(quad2, x2, y2)
            write_csv(quad3, x3, y3)
            write_csv(quad4, x4, y4)

---
## Cutting the Augmented Data

In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import cm
import os
import glob
import re

%matplotlib notebook

In [None]:
# Code to cut the numerical data to where the values become non zero

def num_data_prepro(file_locations):
    
    for idx in range(len(file_locations)):
        
        data_file = pd.read_csv(os.path.abspath(file_locations.iloc[idx,3]))
        
        cut_at_index = 0
        
        def nonzero_index(data_file):
            for i in range(len(data_file.index)):
                for j in range(3,6):
                    if data_file.iloc[i,j] != 0:
                        return i
        
        cut_at_index = nonzero_index(data_file)
        
#10000 data points are equal to half the total data, which seems to have enough usable information.
#10000 datapoints = 0.1 ms = 0.0001 s
                
        data_file_cut = data_file.iloc[cut_at_index : cut_at_index+10000, : ]    
        data_file_cut[' T'] = data_file[' T'].shift(cut_at_index)
        
    
        
        name = str(file_locations.iloc[idx,1]) + "_" +  str(file_locations.iloc[idx,2])

#IMPORTANT: Specify where you want the new files to be saved       
##___________INSERT PATH HERE___________________________________________________________________________________
        path = r'<INSERT PATH>' + r'/' + name + r'.csv'
        
        data_file_cut.to_csv(path, sep = ',', index = False)

In [None]:
#INSERT PATH TO THE FILE CREATED TO THE DATALIST IN THE AUGMENTATION CODE 

file_locations = pd.read_csv('<INSERT PATH>/DataList.csv')
num_data_prepro(file_locations)

---
## Resampling and Normalisation of Cut Data

In [None]:
#########################################################
#this function takes the (x,y) and the data array and combines everything into one and returns the final data array
def getResampledArray(x, y, P):
    pos = np.array([int(x), int(y)])
    [r, c] = P.shape
    print("Reading Cordinates: (" + str(x) + "," + str(y) + ")")

    #confirm if array shape is (20000,5) and in case not, everything is flipped
    if r<c:
        P = np.transpose(P)
        t = r
        r = c
        c = t

    #Here c = 7 (x, y, T, P1, P2, P3, P4)
    temp_ar = np.zeros((c-3, r))
    
    for i in range (3, c):
        temp_ar[i-3:] = P [:, i]

    write_ar = np.zeros((c-1, 100))
    write_ar[0, :] = x
    write_ar[1, :] = y
    
    for i in range (2, 6):
        write_ar[i, :] = signal.resample(temp_ar[i-2,:], 100)
        write_ar[i, :] = 0.1+0.8*(write_ar[i,:]-np.min(write_ar[i, :]))/(np.max(write_ar[i, :])-np.min(write_ar[i, :]))
    
    write_ar = np.transpose(write_ar)
    
    return write_ar

In [None]:
def writeFinalCSV(x, y, write_ar):
    #create the file name according to the x and y coordinates of the data
    
#------------------------------------------------------------------------------------
    #SPECIFY FOLDER PATH TO SAVE FILES HERE
    folder_name = r"<INSERT PATH>"
    individual_name = str(int(x))+"_"+str(int(y))+".csv"
    name = os.path.join(folder_name, individual_name)
    
    
    print("\nWRITING: (" + str(x)+","+str(y) +")\n")
    
    with open(name,'a') as csvfile:
        np.savetxt(csvfile, write_ar, delimiter = ",", header='X, Y, P1, P2, P3, P4')

In [None]:
########################################################
#define the data frame that stores the EPOT_Data details
file_location = pd.DataFrame(columns = ['X', 'Y', 'Path'], dtype = str)

########################################################
#INSERT PATH TO FILES WHERE CUT DATA IS SAVED

mfolder = r"<INSERT PATH>"
folder = (os.path.join(mfolder, '*'))

In [None]:
##################################################
#READS THE FILES STORED AT THE PATH IN FOLDER
#SAVES FILE PATH AND COORDINATES IN A DATAFRAME
#You wrote this code, I've changed nothing except for the name splitting bit
i = 0
for name in glob.glob(folder):
    fpath = name

    #first split on the basis of the \ in the path and take the last bit of it ie  EPOT_250_250.mat
    split_name = re.split(r"\\", name)
    #print(split_name)
    
    #now split the last bit at the "_" and the "."
    coords = re.split("_|\.", split_name[-1])
    #print(coords)

    x_coord = coords[0]
    y_coord = coords[1]
    #print(x_coord + ',' + y_coord)

    file_location.loc[i, 'X'] = str(x_coord)
    file_location.loc[i, 'Y'] = str(y_coord)
    file_location.loc[i, 'Path'] = fpath
    i += 1

In [None]:
###########################################
for idx, row in file_location.iterrows():
    check = 0
    x = float(row.X)

    y = float(row.Y)

    path = row.Path
    if idx < 950:
        P = readCSV(path)
        finalP = getResampledArray(x, y, P)
        writeFinalCSV(x, y, finalP)

---
# Generating Flipped Data

In [None]:
import os
import sys
import glob
import re
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import scipy.io
from scipy import signal

In [None]:
def readCSV(file_path):
    if not os.path.isfile(file_path):
        raise AssertionError(file_path, 'not found')
    
    P = np.genfromtxt(file_path, delimiter = ',', skip_header = 1)
    return P

In [None]:
def flipArray(x, y, P):
    write_ar = np.zeros((np.shape(P)))
    write_ar[:, 0] = P[:, 0]
    write_ar[:, 1] = P[:, 1]
    
    
    for i in range(2, 6):
        temp_ar = np.negative(P[:, i])
        write_ar[:, i] = np.transpose(temp_ar)
    
    return write_ar                        

In [None]:
def write_csv(x, y, write_ar):
    #create the file name according to the x and y coordinates of the data
    #Specify folder to save
    
    folder_name = r"<INSERT PATH HERE>"
    individual_name = str(int(x))+"_"+str(int(y))+".csv"
    name = os.path.join(folder_name, individual_name)
    
    print("\nWRITING: (" + str(x)+","+str(y) +")\n")
    
    with open(name,'a') as csvfile:
        np.savetxt(csvfile, write_ar, delimiter = ",", header='X, Y, P1, P2, P3, P4')

In [None]:
i = 0
for name in glob.glob(folder):
    fpath = name

    #first split on the basis of the \ in the path and take the last bit of it ie  EPOT_250_250.mat
    split_name = re.split(r"\\", name)
    #print(split_name)
    
    #now split the last bit at the "_" and the "." so you get "EPOT", "250", "250", "mat" and then you just take the coordinates
    coords = re.split("_|\.", split_name[-1])
    #print(coords)

    x_coord = coords[0]
    y_coord = coords[1]
    #print(x_coord + ',' + y_coord)

    file_location.loc[i, 'X'] = str(x_coord)
    file_location.loc[i, 'Y'] = str(y_coord)
    file_location.loc[i, 'Path'] = fpath
    i += 1

In [None]:
###########################################
for idx, row in file_location.iterrows():
    check = 0
    x = float(row.X)

    y = float(row.Y)

    path = row.Path
    if idx < 950:
        P = readCSV(path)
        finalP = flipArray(x, y, P)
    
        write_csv(x, y, finalP)
        