In [1]:
from pathlib import Path
import multiprocessing as mp
import itertools
import pydicom
import pandas as pd
import numpy as np

num_workers = mp.cpu_count() - 1

In [2]:
p = Path("/nfs/masi/khanms/massion/test")

dcm_list = list(p.glob("**/*.dcm"))

In [3]:
dcm_list[0]

PosixPath('/nfs/masi/khanms/massion/test/10291207324/54922574/402/1168.dcm')

# Simple example

In [82]:
test = [('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50), ('bob', 'blue', 'a', 10), ('sami', 'green', 'a', 1)]

In [17]:
key, mx, mn, ct = [], [], [], []
def inst_info(l):
    key = [k for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mx = [max(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mn = [min(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    ct = [len(list(v)) for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    return zip(key, mx, mn, ct)

In [22]:
list( inst_info(test) )

[(('sami', 'blue', 'a'), 50, 3, 2),
 (('bob', 'blue', 'a'), 10, 10, 1),
 (('sami', 'green', 'a'), 1, 1, 1)]

In [17]:
max([1, 3, np.nan])

3

# Generate info for each instance

In [18]:
def dcm_instance(dcm_file):
    '''
    For each dcm file -> (Subject, Session, Instance, InstanceNumber)
    '''
    ds = pydicom.dcmread(str(dcm_file))
    try:
        return( Path(dcm_file).parts[-4], Path(dcm_file).parts[-3], Path(dcm_file).parts[-2], int(ds[0x20, 0x13].value))
    except:
        return( Path(dcm_file).parts[-4], Path(dcm_file).parts[-3], Path(dcm_file).parts[-2], np.nan )

In [19]:
%%time

res = list( map(dcm_instance, dcm_list) )

CPU times: user 22.3 s, sys: 3.03 s, total: 25.3 s
Wall time: 32.6 s


In [20]:
%%time

pool = mp.Pool(processes=num_workers)
results = pool.map(dcm_instance, dcm_list)

CPU times: user 43.8 ms, sys: 58.4 ms, total: 102 ms
Wall time: 4.22 s


In [21]:
assert res == results #proof we generate the same output

Get the instance number, DICOM count, and diff for each.

In [22]:
def inst_info(l):
    key, mx, mn, ct = [], [], [], []
    key = [k for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mx = [max(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mn = [min(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    ct = [len(list(v)) for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    
    df = pd.DataFrame( zip(key, ct, mx, mn) , columns=['key', 'dcmN', 'max_instN', 'min_instN'])
    df['subject'], df['session'], df['inst'] = zip(*df['key'])
    df = df.assign(instanceN = df['max_instN'] - df['min_instN'] + 1,
                   delta_dcmN_instN = df['max_instN'] - df['min_instN'] + 1 - df['dcmN'])
    
    return df[['subject', 'session', 'dcmN', 'instanceN', 'delta_dcmN_instN']]

In [23]:
%%timeit

inst_info(res)

19.5 ms ± 122 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


# Putting it together

> This is the code to do instance check!

In [60]:
def dcm_instance(dcm_file):
    '''
    For each dcm file -> (Subject, Session, Instance, InstanceNumber)
    '''
    ds = pydicom.dcmread(str(dcm_file))
    try:
        return( Path(dcm_file).parts[-4], Path(dcm_file).parts[-3], Path(dcm_file).parts[-2], int(ds[0x20, 0x13].value))
    except:
        return( Path(dcm_file).parts[-4], Path(dcm_file).parts[-3], Path(dcm_file).parts[-2], np.nan )


def instance_check(dcm_root_folder):
    # get a list of all of the DICOM files in the root folder
    dcm_list = list(Path(dcm_root_folder).glob("**/*.dcm"))
    # run `dcm_instance` on each DICOM file in the list (parallelized)
    pool = mp.Pool(processes=num_workers)
    l = pool.map(dcm_instance, dcm_list)
    # take output of this info and create a df
    key, mx, mn, ct = [], [], [], []
    # groupby subject_id, session and instance
    key = [k for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    # get max, min instance num for each subj, session, instance group
    mx = [max(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mn = [min(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    # get the num of dcm image files for each instance
    ct = [len(list(v)) for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    
    #generate dataframe
    df = pd.DataFrame( zip(key, ct, mx, mn) , columns=['key', 'dcmN', 'max_instN', 'min_instN'])
    df['subject'], df['session'], df['inst'] = zip(*df['key'])
    df = df.assign(instanceN = df['max_instN'] - df['min_instN'] + 1,
                   delta_dcmN_instN = df['max_instN'] - df['min_instN'] + 1 - df['dcmN'])
    df = df.groupby(['subject', 'session']).apply(lambda x: x.loc[x.dcmN.idxmax(),['dcmN','instanceN', 'delta_dcmN_instN']]).reset_index()
    # indicate duplicate if instanceN is 0.5 of dcmN; `np.where` is optimized so negligible time to run
    df['dupe_inst'] = np.where(df.instanceN / df.dcmN == 0.5, 1, 0)
    
    return df

In [61]:
test = instance_check("/nfs/masi/khanms/massion/test")

In [62]:
test

Unnamed: 0,subject,session,dcmN,instanceN,delta_dcmN_instN,dupe_inst
0,10291207324,54741763,1408,704,-704,1
1,10291207324,54922574,554,554,0,0
2,10291207324,56876118,391,391,0,0


In [63]:
%%timeit

instance_check("/nfs/masi/khanms/massion/test")

4.4 s ± 15 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Original Implementation

In [35]:
def dcm_instance(dcm_root):
    """
    Check instance numbers in a DICOM folder
    Does the instance number match the number of DICOMs in each session?
    Returns (<num1>, <num2>, <num3>)
    Valid: if <num1> == <num2> & <num3> == 0 (<num3> is the diff b/w num1 and num2)
    Invalid: if <num1> != <num2>, i.e. <num3> != 0
    """
    if Path(dcm_root).exists() == False:
        print("This folder does not exist. Please input an existing folder path.")
    dcm_list = list(Path(dcm_root).glob("**/*.dcm"))
    # dcm_list = glob(os.path.join(dcm_root, "*.dcm"))
    if len(dcm_list) == 0:
        print(
            "We were unable to find DICOM files in this root directory. Please review path and try again."
        )
    #slicePos = []
    instanceN = []
    for i in range(len(dcm_list)):
        ds = pydicom.dcmread(str(dcm_list[i]))
        # slicePos.append(ds.SliceLocation)
        instanceN.append(ds[0x20, 0x13].value)
    print("max and min of instanceN ", max(instanceN), min(instanceN))
    return (
        len(instanceN),
        max(instanceN) - min(instanceN) + 1,
        max(instanceN) - min(instanceN) + 1 - len(instanceN),
    )


def instanceN_fold(fold_root, save_csv_path="instance_num_check.csv"):  # instanceN_fold
    """
    Arguments:
        - Root folder
        - Location and name to store CSV output
    Output csv file:
        - instance number using header info
        - number of DICOM images for a particular session
        - difference b/w # of DICOM images and Instance number
            - Valid values are those <= 0 (?)
    """
    subj_list = [x.stem for x in Path(fold_root).iterdir() if x.is_dir()]
    sess, single_folder, instanceN, dicomN, diff = [], [], [], [], []
    for i in range(0, len(subj_list)):
        # if i > 30: break
        subj_path = Path(fold_root) / subj_list[i]
        sess_list = [x.stem for x in Path(subj_path).iterdir() if x.is_dir()]
        for j in range(len(sess_list)):
            sess.append(sess_list[j])
            # print("(i, j): ", i, j, sess_list[j])
            sess_path = subj_path / sess_list[j]
            instance_list = [x.stem for x in Path(sess_path).iterdir() if x.is_dir()]
            if len(instance_list) == 1:
                single_folder.append(1)
            else:
                single_folder.append(0)
            size_list = []
            for k in range(len(instance_list)):
                p = sess_path / instance_list[k]
                size_list.append(len(list(p.rglob("*.dcm"))))
                # print(sess_path / instance_list[k])
                # if (sess_path / instance_list[k] / "secondary").exists() and not (sess_path / instance_list[k] / "DICOM").exists():    # Unnecessary if not dealing with DICOM subdir
                #    (sess_path / instance_list[k] / "secondary").rename(sess_path / instance_list[k] / "DICOM")
                # size = len(os.listdir(sess_path + "/" + instance_list[k] + "/DICOM")) # There is no DICOM subdirectory, so this throws an error
                # size = len([x for x in  if x.is_dir()])
                # size_list.append(size)
            max_index = size_list.index(max(size_list))
            # break

            # Renames the dir with the greatest # of dcm image files
            (sess_path / instance_list[max_index]).rename(sess_path / "new_max")
            try:
                # inst_n, dicom_n, same = dcm_instance(sess_path + "/new_max/DICOM") # Again, there is no DICOM subdirectory, so this throws an error
                inst_n, dicom_n, same = dcm_instance(sess_path / "new_max")
                instanceN.append(inst_n)
                dicomN.append(dicom_n)
                diff.append(same)
            except:
                instanceN.append("")
                dicomN.append("")
                diff.append("")
                print("dicom error")
    data = pd.DataFrame()
    data["sess"] = sess
    data["single_folder"] = single_folder
    data["instanceN"] = instanceN
    data["dicomN"] = dicomN
    data["dicomN-instanceN"] = diff
    
    return data

In [39]:
%%timeit

instanceN_fold("/nfs/masi/khanms/massion/test")

max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
max and min of instanceN  554 1
max and min of instanceN  391 1
max and min of instanceN  704 1
7.81 s ± 13.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


# Slice Distance Check

In [4]:
def dcm_slicedist(dcm_file):
    '''
    For each dcm file -> (Subject, Session, Instance, SliceLocation)
    '''
    ds = pydicom.dcmread(str(dcm_file))
    try:
        return( Path(dcm_file).parts[-4], Path(dcm_file).parts[-3], Path(dcm_file).parts[-2], float(ds.SliceLocation) )
    except:
        return( Path(dcm_file).parts[-4], Path(dcm_file).parts[-3], Path(dcm_file).parts[-2], np.nan )

In [5]:
dcm_slicedist( dcm_list[0] )

('10291207324', '54922574', '402', 138.63)

In [6]:
list( map(dcm_slicedist, dcm_list[0:6] ) )

[('10291207324', '54922574', '402', 138.63),
 ('10291207324', '54922574', '402', 54.63),
 ('10291207324', '54922574', '402', 12.63),
 ('10291207324', '54922574', '402', 75.63),
 ('10291207324', '54922574', '402', 51.63),
 ('10291207324', '54922574', '402', 9.63)]

In [288]:
z = [1, 2, 3, 4, 10]
#z = [1,3,50]

In [289]:
import operator

z_sort = sorted(z, reverse=True)
tmp = list(map(operator.sub, z[1:], z[:-1]))
list(map(operator.sub, tmp[1:], tmp[:-1]))

[0, 0, 5]

In [290]:
tmp

[1, 1, 1, 6]

In [337]:
def slice_loc_delta(ds_list):
    ds_sort = sorted(ds_list, reverse=True)
    res = 1
    for i in range(0, len(ds_sort) - 2):
        #print((ds_sort[i] - ds_sort[i + 1]), (ds_sort[i + 1] - ds_sort[i + 2]))
        #print((ds_sort[i] - ds_sort[i + 1]) - (ds_sort[i + 1] - ds_sort[i + 2]))
        #print((ds_sort[0] - ds_sort[1]))
        if not abs(
            (ds_sort[i] - ds_sort[i + 1]) - (ds_sort[i + 1] - ds_sort[i + 2])
        ) < (ds_sort[0] - ds_sort[1]):
            res = 0
    return res

In [338]:
slice_loc_delta(sorted(z, reverse=True))

1

In [331]:
slice_loc_delta(sorted([9,10,10], reverse=True))

[2, 2]

In [332]:
np.where( slice_loc_delta(sorted(z, reverse=True)), 0, 1 )

array([0, 0, 0])

In [294]:
z_sort[0] - z_sort[1]

6

In [300]:
# inspired by https://www.geeksforgeeks.org/python-generate-successive-element-difference-list/

def slice_analyzer(slice_list):
    l_sort = sorted(slice_list, reverse=True)
    initial_delta = l_sort[0] - l_sort[1]
    tmp = list(map(operator.sub, l_sort[1:], l_sort[:-1]))
    eval_list = list(map(operator.sub, tmp[1:], tmp[:-1]))
    #boolean to make sure all deltas are less than delta b/w 0th and 1st elements
    check_bool = all(x < initial_delta for x in eval_list)
    return initial_delta, eval_list, check_bool

In [301]:
slice_analyzer(z)

(6, [5, 0, 0], True)

In [76]:
np.where(all(x < 6 for x in [5, 0, 0]), 1, 0)

array(1)

In [81]:
from itertools import groupby

things = [("animal", "bear"), ("animal", "duck"), ("plant", "cactus"), ("vehicle", "speed boat"), ("vehicle", "school bus")]

for key, group in groupby(things, lambda x: x[0]):
    for thing in group:
        print("A %s is a %s." % (thing[1], key))
    print(" ")

A bear is a animal.
A duck is a animal.
 
A cactus is a plant.
 
A speed boat is a vehicle.
A school bus is a vehicle.
 


In [83]:
test

[('sami', 'blue', 'a', 3),
 ('sami', 'blue', 'a', 50),
 ('bob', 'blue', 'a', 10),
 ('sami', 'green', 'a', 1)]

In [None]:
key, mx, mn, ct = [], [], [], []
def inst_info(l):
    key = [k for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mx = [max(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    mn = [min(v)[-1] for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    ct = [len(list(v)) for k,v in itertools.groupby(l, key=lambda x:(x[0], x[1], x[2]))]
    return zip(key, mx, mn, ct)

In [84]:
[slice_analyzer(list(g)) for _, g in itertools.groupby(test, key=lambda x:(x[0], x[1], x[2]))]

TypeError: unsupported operand type(s) for -: 'tuple' and 'tuple'

In [137]:
import operator

tmp = [list(v) for k,v in itertools.groupby(test, key=lambda x:(x[0], x[1], x[2]))]
[(i4) for i1, i2, i3, i4 in t]

[3, 50]

In [214]:
import itertools

for key, group in itertools.groupby(sorted(test), key=lambda x:(x[0], x[1], x[2])):
    print(list(group))

[('bob', 'blue', 'a', 9), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 10)]
[('sami', 'blue', 'a', 1), ('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50)]


In [132]:
[x[0] for x, in test]

ValueError: too many values to unpack (expected 1)

In [130]:
servers = [('server1', 80 , 1, 2), ('server2', 443, 3, 4)]

[(server, port) for server, port, *_ in servers]

[('server1', 80), ('server2', 443)]

In [143]:
t = [('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50)]
t2 = [('bob', 'blue', 'a', 10)]

In [146]:
print( [(i4) for i1, i2, i3, i4 in t] )
print( [(i4) for i1, i2, i3, i4 in t2] )

[3, 50]
[10]


In [147]:
def tuple_extractor(tuple_list):
    l = [(i4) for i1, i2, i3, i4 in tuple_list]
    return l

In [148]:
tmp

[[('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50)],
 [('bob', 'blue', 'a', 10)],
 [('sami', 'green', 'a', 1)]]

In [150]:
list( map(tuple_extractor, tmp) )

[[3, 50], [10], [1]]

In [276]:
# This works

def slicedist_info(l):
    key = [k for k,v in itertools.groupby(sorted(l), key=lambda x:(x[0], x[1], x[2]))]
    tmp = [list(v) for k,v in itertools.groupby(sorted(l), key=lambda x:(x[0], x[1], x[2]))]
    val_list = list( map(tuple_extractor, tmp) )
    key_val = zip(key, val_list)
    slice_bool = [slice_analyzer(i[1]) for i in key_val]
    return zip(key, val_list, slice_bool)

In [277]:
test = [('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 9), ('sami', 'blue', 'a', 1)]

t = list( slicedist_info(test) )

[(('bob', 'blue', 'a'), [9, 10, 10], True),
 (('sami', 'blue', 'a'), [1, 3, 50], True)]

In [187]:
[slice_analyzer(i[1]) for i in t]

[True, True]

In [183]:
z = (('bob', 'blue', 'a'), [9, 10, 10])

In [185]:
z[1]

[9, 10, 10]

In [302]:
# inspired by https://www.geeksforgeeks.org/python-generate-successive-element-difference-list/

def slice_analyzer(slice_list):
    # sort from biggest to smallest
    l_sort = sorted(slice_list, reverse=True)
    # calculate dist b/w 0th and 1st element of sorted list
    initial_delta = l_sort[0] - l_sort[1]
    # calc diff b/w i-th and i+1-th element
    tmp = list(map(operator.sub, l_sort[1:], l_sort[:-1]))
    # calc diff b/w above and the i+1th and i+2th-element
    eval_list = [abs(i) for i in list(map(operator.sub, tmp[1:], tmp[:-1])) ]
    #boolean to make sure all deltas are less than delta b/w 0th and 1st elements
    check_bool = all(x < initial_delta for x in eval_list)
    return check_bool

## Putting it all together

In [440]:
# Alternative to `slice_analyzer`
def slice_loc_delta(ds_list):
    ds_sort = sorted(ds_list, reverse=True)
    res = True
    for i in range(0, len(ds_sort) - 2):
        if not abs(
            (ds_sort[i] - ds_sort[i + 1]) - (ds_sort[i + 1] - ds_sort[i + 2])
        ) < (ds_sort[0] - ds_sort[1]):
            res = False
    return res

def slice_analyzer(slice_list):
    """
    # inspired by https://www.geeksforgeeks.org/python-generate-successive-element-difference-list/
    """
    # sort from biggest to smallest
    l_sort = sorted(slice_list, reverse=True)
    # calculate dist b/w 0th and 1st element of sorted list
    initial_delta = l_sort[0] - l_sort[1]
    # calc diff b/w i-th and i+1-th element
    tmp = list(map(operator.sub, l_sort[1:], l_sort[:-1]))
    # calc diff b/w above and the i+1th and i+2th-element
    eval_list = [abs(i) for i in list(map(operator.sub, tmp[1:], tmp[:-1])) ]
    #boolean to make sure all deltas are less than delta b/w 0th and 1st elements
    check_bool = all(x < initial_delta for x in eval_list)
    return check_bool

def tuple_extractor(tuple_list):
    l = [(i4) for i1, i2, i3, i4 in tuple_list]
    return l

def slicedist_check(l):  #note elsewhere this is called `slicedist_info`
    key = [k for k,v in itertools.groupby(sorted(l), key=lambda x:(x[0], x[1], x[2]))]
    tmp = [list(v) for k,v in itertools.groupby(sorted(l), key=lambda x:(x[0], x[1], x[2]))]
    val_list = list( map(tuple_extractor, tmp) )
    key_val = zip(key, val_list)
    slice_bool = [slice_loc_delta(i[1]) for i in key_val]    # can replace slice_analyzer with slice_loc_delta

    df = pd.DataFrame( zip( key, slice_bool ), columns=['key', 'distance_check'] )
    df['subject'], df['session'], df['inst'] = zip(*df['key'])
    return df[['subject', 'session', 'inst', 'distance_check']]

In [441]:
key = [('bob', 'blue', 'a'), ('sami', 'blue', 'a')]
slice_bool = [False, True]

In [442]:
pd.DataFrame( zip( key, slice_bool ) )

Unnamed: 0,0,1
0,"(bob, blue, a)",False
1,"(sami, blue, a)",True


In [443]:
slicedist_info(test)

Unnamed: 0,subject,session,inst,distance_check
0,bob,blue,a,False
1,sami,blue,a,True


In [418]:
test = [('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 9), ('sami', 'blue', 'a', 1)]

df = pd.DataFrame( list( slicedist_info(test) ), columns=['key', 'distance_check'])
df['subject'], df['session'], df['inst'] = zip(*df['key'])
df[['subject', 'session', 'inst', 'distance_check']]

Unnamed: 0,subject,session,inst,distance_check
0,bob,blue,a,False
1,sami,blue,a,True


In [350]:
# Alternative to `slice_analyzer`
def slice_loc_delta(ds_list):
    ds_sort = sorted(ds_list, reverse=True)
    res = True
    for i in range(0, len(ds_sort) - 2):
        if not abs(
            (ds_sort[i] - ds_sort[i + 1]) - (ds_sort[i + 1] - ds_sort[i + 2])
        ) < (ds_sort[0] - ds_sort[1]):
            res = False
    return res

def slice_analyzer(slice_list):
    """
    # inspired by https://www.geeksforgeeks.org/python-generate-successive-element-difference-list/
    """
    # sort from biggest to smallest
    l_sort = sorted(slice_list, reverse=True)
    # calculate dist b/w 0th and 1st element of sorted list
    initial_delta = l_sort[0] - l_sort[1]
    # calc diff b/w i-th and i+1-th element
    tmp = list(map(operator.sub, l_sort[1:], l_sort[:-1]))
    # calc diff b/w above and the i+1th and i+2th-element
    eval_list = [abs(i) for i in list(map(operator.sub, tmp[1:], tmp[:-1])) ]
    #boolean to make sure all deltas are less than delta b/w 0th and 1st elements
    check_bool = all(x < initial_delta for x in eval_list)
    return int(check_bool)

def tuple_extractor(tuple_list):
    l = [(i4) for i1, i2, i3, i4 in tuple_list]
    return l

def slicedist_info(l):
    key = [k for k,v in itertools.groupby(sorted(l), key=lambda x:(x[0], x[1], x[2]))]
    tmp = [list(v) for k,v in itertools.groupby(sorted(l), key=lambda x:(x[0], x[1], x[2]))]
    val_list = list( map(tuple_extractor, tmp) )
    key_val = zip(key, val_list)
    slice_bool = [slice_analyzer(i[1]) for i in key_val]    # can replace slice_analyzer with slice_loc_delta
    return zip(key, slice_bool)

In [355]:
test = [('sami', 'blue', 'a', 3), ('sami', 'blue', 'a', 50), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 10), ('bob', 'blue', 'a', 9), ('sami', 'blue', 'a', 1)]

assert list( slicedist_info(test) ) == [(('bob', 'blue', 'a'), False), (('sami', 'blue', 'a'), True)]

AssertionError: 

# Test statements

In [314]:
# Make sure slice_analyzer function works well


def test_slice_analyzer(slice_list):
    # unsorted example
    l_sort = slice_list
    initial_delta = l_sort[0] - l_sort[1]
    tmp = list(map(operator.sub, l_sort[1:], l_sort[:-1]))
    eval_list = [abs(i) for i in list(map(operator.sub, tmp[1:], tmp[:-1])) ]
    #boolean to make sure all deltas are less than delta b/w 0th and 1st elements
    check_bool = all(x < initial_delta for x in eval_list)
    return eval_list

test_list = [1, 4, 5, 3, 6] 
# diff b/w ith and i+1th element: 1 and 4 -> 3, 4 and 5 -> 1, etc. --->  [3, 1, -2, 3]
# i+1th and i+2th element (absolute value): 3 and 1 -> -2, 1 and -2 -> 3, -2-3 = 5  ---> [-2,-3, 5]

print(f"Result from unsorted test list: {test_slice_analyzer(test_list)}." )

assert test_slice_analyzer(test_list) == [2,3, 5]

# For our purposes, we want a sorted list though so let's rewrite this test

def test_slice_analyzer_sorted(slice_list):
    # sort from biggest to smallest
    l_sort = sorted(slice_list, reverse=True)
    initial_delta = l_sort[0] - l_sort[1]
    tmp = list(map(operator.sub, l_sort[1:], l_sort[:-1]))
    eval_list = [abs(i) for i in list(map(operator.sub, tmp[1:], tmp[:-1])) ]
    #boolean to make sure all deltas are less than delta b/w 0th and 1st elements
    check_bool = all(x < initial_delta for x in eval_list)
    return eval_list

test_list = [1, 4, 5, 3, 6] 
# sorted list = [6,5,4,3,1]
# diff b/w ith and i+1th element: 6 and 5 -> 1, 5 and 4 -> 1, etc. --->  [1,1,1,2]
# i+1th and i+2th element (absolute value): 3 and 1 -> -2, 1 and -2 -> 3, -2-3 = 5  ---> [0, 0, 1]

print(f"Result from unsorted test list: {test_slice_analyzer_sorted(test_list)}." )

assert test_slice_analyzer_sorted(test_list) == [0,0,1]

Result from unsorted test list: [2, 3, 5].
Result from unsorted test list: [0, 0, 1].
