From a9a8b7ae21e32e966ac58f56f5d6b2fa354bef01 Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Mon, 21 Oct 2024 13:40:44 +0100 Subject: [PATCH 1/2] Allow AAP to be non integer Changed integer to a float to allow decimal place on input e.g 0.5, 0.45 etc. Previously was forced on input to be an integer (then later turned into a float). --- MultiThreadIO.py | 11 +++++++---- Pedigree.py | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/MultiThreadIO.py b/MultiThreadIO.py index 1088f10..c88b166 100644 --- a/MultiThreadIO.py +++ b/MultiThreadIO.py @@ -56,7 +56,7 @@ def split_by(array, step): -def process_input_line(line, startsnp, stopsnp, dtype): +def process_input_line(line, startsnp, stopsnp, dtype, nonInteger = False): parts = line.split(); idx = parts[0] parts = parts[1:] @@ -64,7 +64,10 @@ def process_input_line(line, startsnp, stopsnp, dtype): if startsnp is not None : parts = parts[startsnp : stopsnp + 1] #Offset 1 for id and 2 for id + include stopsnp - data=np.array([int(val) for val in parts], dtype = dtype) + if nonInteger : + data = np.array([val for val in parts], dtype=dtype) + else : + data = np.array([int(val) for val in parts], dtype = dtype) return (idx, data) @@ -91,7 +94,7 @@ def process_input_line_plink(line, startsnp, stopsnp, dtype): return (idx, data) -def readLines(fileName, startsnp, stopsnp, dtype, processor=process_input_line): +def readLines(fileName, startsnp, stopsnp, dtype, processor=process_input_line, nonInteger = False): # print(f"Reading in file: {fileName}") try: @@ -117,7 +120,7 @@ def readLines(fileName, startsnp, stopsnp, dtype, processor=process_input_line): if iothreads <= 1: for line in f: - output.append(processor(line, startsnp = startsnp, stopsnp = stopsnp, dtype = dtype)) + output.append(processor(line, startsnp = startsnp, stopsnp = stopsnp, dtype = dtype, nonInteger = nonInteger)) return output diff --git a/Pedigree.py b/Pedigree.py index 090f248..f42a852 100644 --- a/Pedigree.py +++ b/Pedigree.py @@ -942,7 +942,7 @@ def readInAAP(self, fileName): :param fileName: The file path :type fileName: str """ - data_list = MultiThreadIO.readLines(fileName, startsnp=None, stopsnp=None, dtype = np.int64) + data_list = MultiThreadIO.readLines(fileName, startsnp=None, stopsnp=None, dtype = np.float32, nonInteger = True) # flag of whether adding a default alternative allele probability default_aap = False From e1fc6cf2b56572f391b70c5d56a827d0219d43f4 Mon Sep 17 00:00:00 2001 From: RosCraddock <109593931+RosCraddock@users.noreply.github.com> Date: Tue, 22 Oct 2024 13:44:46 +0100 Subject: [PATCH 2/2] Removed nonInteger argument --- MultiThreadIO.py | 8 ++++---- Pedigree.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/MultiThreadIO.py b/MultiThreadIO.py index c88b166..2577944 100644 --- a/MultiThreadIO.py +++ b/MultiThreadIO.py @@ -56,7 +56,7 @@ def split_by(array, step): -def process_input_line(line, startsnp, stopsnp, dtype, nonInteger = False): +def process_input_line(line, startsnp, stopsnp, dtype): parts = line.split(); idx = parts[0] parts = parts[1:] @@ -64,7 +64,7 @@ def process_input_line(line, startsnp, stopsnp, dtype, nonInteger = False): if startsnp is not None : parts = parts[startsnp : stopsnp + 1] #Offset 1 for id and 2 for id + include stopsnp - if nonInteger : + if dtype in [np.float16, np.float32, np.float64] : data = np.array([val for val in parts], dtype=dtype) else : data = np.array([int(val) for val in parts], dtype = dtype) @@ -94,7 +94,7 @@ def process_input_line_plink(line, startsnp, stopsnp, dtype): return (idx, data) -def readLines(fileName, startsnp, stopsnp, dtype, processor=process_input_line, nonInteger = False): +def readLines(fileName, startsnp, stopsnp, dtype, processor=process_input_line): # print(f"Reading in file: {fileName}") try: @@ -120,7 +120,7 @@ def readLines(fileName, startsnp, stopsnp, dtype, processor=process_input_line, if iothreads <= 1: for line in f: - output.append(processor(line, startsnp = startsnp, stopsnp = stopsnp, dtype = dtype, nonInteger = nonInteger)) + output.append(processor(line, startsnp = startsnp, stopsnp = stopsnp, dtype = dtype)) return output diff --git a/Pedigree.py b/Pedigree.py index f42a852..339f97e 100644 --- a/Pedigree.py +++ b/Pedigree.py @@ -942,7 +942,7 @@ def readInAAP(self, fileName): :param fileName: The file path :type fileName: str """ - data_list = MultiThreadIO.readLines(fileName, startsnp=None, stopsnp=None, dtype = np.float32, nonInteger = True) + data_list = MultiThreadIO.readLines(fileName, startsnp=None, stopsnp=None, dtype = np.float32) # flag of whether adding a default alternative allele probability default_aap = False