From 33e45804a38c81232ce492a38f2a347519e09e75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Fri, 31 May 2019 13:59:38 -0400 Subject: [PATCH 1/2] modify paired_with check to work with multiple aliases for cases with 2 different aliases in filefastq aliases, create a symlink key for the secondary aliases, and in the check, look for symlinks and connect to the main item (first alias) --- wranglertools/import_data.py | 50 +++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 9 deletions(-) diff --git a/wranglertools/import_data.py b/wranglertools/import_data.py index 83c75638..b945cf66 100755 --- a/wranglertools/import_data.py +++ b/wranglertools/import_data.py @@ -909,9 +909,12 @@ def _add_e_to_edict(alias, err, errors): def _pairing_consistency_check(files, errors): """checks the datastructure for consistency""" - file_list = sorted([f for f in files]) + file_list = sorted([f for f in files if not files[f].get('symlink')]) pair_list = [] for f, info in files.items(): + # skip links for secondary aliases + if info.get('symlink'): + continue pair = info.get('pair') if not pair: err = 'no paired file but paired_end = ' + info.get('end') @@ -919,6 +922,7 @@ def _pairing_consistency_check(files, errors): else: pair_list.append(pair) paircnts = Counter(pair_list) + # filelist without symlinks should have the same size as paircnts if len(file_list) != len(paircnts): err = str(len(file_list)) + " FILES paired with " + str(len(paircnts)) errors = _add_e_to_edict('MISMATCH', err, errors) @@ -929,8 +933,10 @@ def check_file_pairing(fastq_row): """checks consistency between file pair info within sheet""" fields = next(fastq_row) fields.pop(0) + # make sure we have the aliases field if 'aliases' not in fields: return {'NO GO': 'Can only check file pairing by aliases'} + # find alias and paired_end column indexes alias_idx = fields.index("aliases") pair_idx = None if 'paired_end' in fields: @@ -946,36 +952,62 @@ def check_file_pairing(fastq_row): err = "alias missing - can't check file pairing" errors = _add_e_to_edict('unaliased', err, errors) continue + # look for multiple aliases, treat first alias as the main one, and others as secondary + aliases = [x.strip() for x in alias.split(",")] + aliases = list(filter(None, aliases)) paired_end = row[pair_idx] if pair_idx else None saw_pair = False for i, fld in enumerate(row): if fld.strip() == 'paired with': if saw_pair: err = 'single row with multiple paired_with values' - errors = _add_e_to_edict(alias, err, errors) + errors = _add_e_to_edict(aliases[0], err, errors) continue else: pfile = row[i + 1] saw_pair = True if not paired_end: err = 'missing paired_end number' - errors = _add_e_to_edict(alias, err, errors) - files[alias] = {'end': paired_end, 'pair': pfile} + errors = _add_e_to_edict(aliases[0], err, errors) + main = True + # if there are multiple aliases, create symlinks with secondary aliases in the files dictionary + for an_alias in aliases: + # if this is the first alias, put all info in the dict + if main: + files[an_alias] = {'end': paired_end, 'pair': pfile} + main = False + else: + files[an_alias] = {'symlink': aliases[0]} + if not saw_pair and paired_end: files[alias] = {'end': paired_end} for f, info in sorted(files.items()): # sorted purely for testing + # skip the aliases that are secondary + if info.get('symlink'): + continue if info.get('pair'): fp = info.get('pair') if fp not in files: err = "paired with not found %s" % fp errors = _add_e_to_edict(f, err, errors) else: - if files[fp].get('pair') and files[fp]['pair'] != f: - err = 'attempting to alter existing pair %s\t%s' % (fp, files[fp]['pair']) - errors = _add_e_to_edict(f, err, errors) - else: + # if the linked one is an symlink, go the the main one + if files[fp].get('symlink'): + fp = files[fp]['symlink'] + # Paired file might not have the mirroring pair info, FF creates that automatically + if not files[fp].get('pair'): files[fp]['pair'] = f - + # if there is pairing info, check that if linking is mutual + else: + mirrored_pair = files[fp]['pair'] + # convert the symlink to the main id + if files[mirrored_pair].get('symlink'): + mirrored_pair = files[mirrored_pair]['symlink'] + # correct the record in files + files[fp]['pair'] = mirrored_pair + if mirrored_pair != f: + err = 'attempting to alter existing pair %s\t%s' % (fp, files[fp]['pair']) + errors = _add_e_to_edict(f, err, errors) return _pairing_consistency_check(files, errors) From 565ec464c7cc797efbc8be5ed6d3e524aeb4b081 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Koray=20K=C4=B1rl=C4=B1?= Date: Fri, 31 May 2019 13:59:47 -0400 Subject: [PATCH 2/2] add test --- tests/data_files/FileFastq_pairing.xlsx | Bin 0 -> 24750 bytes tests/test_import_data.py | 9 +++++++++ 2 files changed, 9 insertions(+) create mode 100644 tests/data_files/FileFastq_pairing.xlsx diff --git a/tests/data_files/FileFastq_pairing.xlsx b/tests/data_files/FileFastq_pairing.xlsx new file mode 100644 index 0000000000000000000000000000000000000000..9be2870a4fb138ea70366f0d123cc7aef5cfabe6 GIT binary patch literal 24750 zcmeFX2Ut`~(;$3ch$0yQL4pKPNh(Ps3`hnE0un?(au{F`1{h&r5QHHJs3<5%lB@*D zNR+H1l0iVi3`!0&N*G~a_TYW*eee5y-@m*6xBouR?!S9@=u@Y-d=()sVL881C${B z|9$=s)CU(w|IL%I?&OK?B`-#br&Xf7T*4{$N?(}Qo|~A$oojVDsqK(I@cHV8+Hf!V=B8&7 zwfq-yz2Ze-PFq1QoJJ#9tu8aC2-PrXYwj^J?!LSkM?>6pM|6n034eLkV-#s9#%vP* zG;v}+x;9AU$>+-b>9?O>Ik$(Do@b?h5_Ekgol9+A4LA7_(m+Zt$^ImgSo#CGUDtuZ zEWA^HURKQ^_=>CHbGC@HKP#^4Lg#0NG<3roRFIGL@zgPmrInjE1Z`_BAz;*dz*R>E zmISGxo7_2TnRnko`Lr`r2+!3U_8#L|6LnQ6l|#u%51kXa9lv_Mld)`MS6TQP2mFwu zNh_#=kjD6N*)7j$7atW!y<4t(BakGev@1e&`aJRE!xq>5J9&>(rt#TRIU(#K@A=`G zGug{rpnZ|a5WwVLoHX>4c_t(1q|=}eGJsBM@8{y>FD-Rc{$J<(KiDJxCVI(Z!z1U6 z+SUEqmKHi+EqLD~!-vH*h%O3pZQa9>jr>0FbhApdmPQ`ijfi}e`Nhq?^ljG*Dj|g| zHC2=;TJ#S}5c*Tv?_dYj^|YMqH=~-XMCNo^r6Okpl+lDDU(LRJzsvP)mQuSiud;R;9?f}Wgnxc`Ll-x zT<%znGYooe(k0ykCT!U+P@l&Mjk)hf-t|MK?U=spgh(zAeuzAs{u#L-D^eLZqRbWy z)u|>KzdZNza#qfwfD}S$u|Y9q^t(iBc{{hIC$p>D^6Wa(_A#1-R3Jp7wwCQ}Vdqv*W_? z?#;;N$idJ%LkSnPGwiyv$0b7mC-FP(j=To@$#q#DvJG<(ske@2-EZ9#%UR!WJJrc` zW+l?n?^GZX1v@GP0U%j7-9i=tof z*X-vK5+TTbqtMJX!<5V!b3JGM_MM2iF`r@NKCAeBUFuxQci4r|v3V;+Kzxix(vKu`szrRJwU@@JyubSv zE8w0w-@Wgn6EmM+@~QNin5?s>(}|yRlMHtCdZTtA6PLB54Wa(R)sj_J8S2m^No`60LvuapSEmZ+($q;O-h}Sv zq)Iw3pULA$#y$PstW|cg!`goLCNFfn_hL*}Fq}!7LT+(GbXVn60aa7R$lWK7zUbR$ zxDt#_EnR_{4nsN?X$5E^n=eLjUqpQN+vQk=)$uPnr==4x8@P4Oh)fmqa~_II0EGHO zdR)~fle@LL{X9(L1 zJNkaG7sUs0yjm$09Sy!h+ENSa6H-%V&YoY_CE=fS%xl*uSNfA9;|B(|vXq4DLM6k% znf~`SAkVOA>H*IG1PuUi{9yzB?hbw~&SnTd4<9%GqgCVnjW*9IHI`ettV3~_O$#)Jm zcAdvwJG`LU^!fCay}kNoA_v7?hrEHYxhpFVU#9o%2Z;+YE5qFRMrZ1JMRF>;wN1Qi zWWN@d!0s;&t+RRZ9n+J02}eA(wX+jTFgX)3w2@%1Gj7ScmJmESKTTnkH|VP6S@k2m zs`RQPHs0O$tv<>1=@v0ZteWjUN4tP{{zWJ^T}Y`3lS`s@G#}?}|_cRfNf%yHQx z4EH4Vo+Si3=(ekyZ&teKrCqltaV(qD;6^bU{{|M)-N@}+{P9-2;{@Vvjznz1=(Eh8 zoUOW>Pvy3lsamv`grg|$ZU$1#wA0>TG;HB&<#)ev8QbvFr0*SQ+xrZ zerE6QxLD7&3Z`K%@F)g5^Y+wmb7QUD>P%m$(&u2?%mX>zXSnREeK|J1$?1FSHZu8M zYf+8~Tf$=fKwq1cpkz_v?6<(d#oG7X4%4F+Pj!1YtMnhD(+=pG4@C2-6hBp-qgl5> zi0bEMzM`|Epw}9AV#^3#U@-qIAKJ&|IN$UR^O9UJr}Zls^-;^+3(p(jL3gTUSY5`; z<@DSm(wo~i(hB{be=VIsy)*u=stU}ZYYDbA6`)S50|G4%rP)lS^${gJ36%eUY-!{F5WKE zzsrk9mbqqa4CmG2XcZ>#X@|g_!~%JA0$zQouyG&OuCN)IN2=@?$uw4_ofIQ5*64_c zXsX|OSbs-NE#CrXs%n@`7dKM=F8%rRwvn>W3pO=1fA{99*4>5X*?k%G&PvVvvm6Z} zk?(pkMRsLWF(Ri0hMd11-`HtPm8TRrVHEw0a9=j^+@rRd(xBP5aSAsy9V66KAEnoy z)YQ*U7Z<;D-$6q9=MxdUPp8&{6UuRfk0x`%1H4sFNA+DoMZ0C`O(L93Lfm9uo>mVn zz4Ybd(;#l;Gx{s{9>!)`7@ne!;Y|8-l$LM9hbrZIB>0R{z`^XqnYH^Nq|WM zKQd`LX1(fTZu9mt+xx>8&}BI-=m! z$)+sFqL}9`mi?`v+oW^B+oTU4I^4vV(U4 z;9o=;s9g#HiryVGhK8-sUZw7n2qyeWw$D_#OX%up4g2{8GRuiHiQe_NCemP-IG~!; zrl9cvrg$>ur{0$EH9q=N_k5C865~$It)FvMy%k^oZJK>R8TD>8nKrSsS@#;p_9v7~ z9N(POVo6t2YIu5Z8C%DJ-lZL5sWUELRl6}QZ>KKbxjOYcZ$R#69-SldT8`l`H@J{(VJRyB_u^r>u{viPq2YHdb1y!Xaj?DR=ma1&5s4FRvVreWLnY ziIOBqeQ+i^>^tMWRC>3R>Dl19;kyj}cG?^G$iVyaBbfC;k@HTo$+Qn&amMUzo|a%=NBUI)j9q909v>Z{dl;Ycb= z0h1HkLND4|nt9wsot_u-8$9u&U(}LW8!aWg*|prMwjN((nNk$7XhgYV#oh4;U{ zNLWFJKFhb~b>T?XkH6h&d*IBXY-=cSZk*F#;JS?ah~+B9u{)E;wS9-8vhEyHPY#xj zncyyqzbfiAmT4VdpzFr3K(f9O)f-PuJ^$p+`Mk-tOO+R8%PUPPH2a;`?MufIm9`Hc znXZ*DPQZ8{3Xz|kVDVI_c9s>&IZxXt`|@p7lC{`3b}db59{wDP^qA1&SX!eRsK$A0 zdpk#USooA&U+vxRJ7XooPd2NMe@=8t(Z0NzvKM%!{4#^WLEf7-<)@z@vrVeQS9;cNSv{37bH4P-x$Ii$1jJ9* z%6|7kQ}XeXs4RAKJ?H!o7)0TvfKFlI6$T2r${o;+cW_E}U~iH(^R+FM3{Yf4v-oYwEdR zwdQ78PkVJwz=~W%_x!*vhigQ27Bdq!_cQW4v$}=-kS*gST?O5J7_iT;=3W6^!*`%C!CP&CK2 zFKm_ex<4c<)nOMi+@t2wHV5X8xgsS!Eqkvbx4p1e+dmzbY-Ew=@>He9hf2V{?zz(< zr<_a28|wWEtbVwiwP{B}U1pN|E1yh6;$V<%nkU8c<7O!rslWA!@uRRD+=+H7!)S-W ziswG2fvZR1`@awFw0LL6_rc(9juikH{}J3F?s~cWj_TH|4B=B3jyE%ZL0>??PZWC8 zQPsw8u#{jD#lpZi}3IGLzB=@{a`VE4vcJl8LT$=B+;qQP?QQzzx_@F$vYACFNL zzsgG(q|2V>J!5Rz{JuDZipk4D^wBSNWp*YR$o*tpHg(u9ZJG9IjzD+0g%C^2?3UQ9 z^JR$JZ9m@-4(xt$>Jl5KXHbwGna|^@CmYr~G}+X>Bu4#?T^)Gf_L}WM@wK2;*~7Nu0BXU>y&5m*q0k; zk$du8py|h7kp}CtcQK*#bG^zBpFTSllHNsU6dP}+bU#D3gXayauJw;mThRq`uE&5~D z@GsB!-r~a-EB>LcGd9Y%^Jl(p3Iw3Bs_eyg^^zXnA_)jvFUZOLsyj^T=F6cEvj0*V zY^W#yoUv&16Aftr=_`CUb6DRy!{a-DzenMoa*xbFLdYw4;rwl-n9n+2mglKiSFXP| zqrt7WB*AAu}Rvyk`xCQu^ z8u6i@8~qbb!Hrj3g@Wy-N4?KdPgJf79hj}E8WIovY)=8gxr)X08KnZ(9tGoNm=TwLkie)S0cq|sRBgZ+Q#CD+<$1m?htfez4#xc~5yqhqkYyNe6LU;6j! zcd+>&)z*LVGDq}gkSjT|hbx53%ZZYn`brzq@aO9EYs22)y|#uGmVquy7%~LoY)j?lPmXkPZMjZ z`3o;-M1`J~%Y2&H{FdFw_rX`c$|+VnC;7rHL5l64km9@PrJusYY-7LttZ4b@Zmkg~ zc`gQdLoc-a(}KY62^V*p@C|=ib?bDS1MWO&z3#JxN*5y#_oNU-zvSx~1&2Lz#;}jt z{CZEyduFiR{I02|XYz=zrX*j0(kSVr;E8AolGW9Ta@ehbCogQk9Ur2YCVAVni6FX;bg;08D`QDSc=l_MNMa+A~bWfr!iRE#=FZKH; zXW73|i`Ab^IXv??pY+=G(YWmiKX;qKsbAI3EY(`4#Hxu#es{w4P8%i$Em_4#Z*5V) zZkSXRri5m;7Cdvi9VRT7OMN!nxLm}KA<}=T1O{g_(jG*x8TE{}r1HJZc2P<$so^h9fH8KA2UD-`T(L2P3Pd*#2$xJZwrqhiaoJrA_JanOCKV=ndN@+e88MdGj_Nq zbFwM=w+mG$V?`Bu!a&-Fw!aNQz9r|-bnb~S`5El~4nd!h847#*LdUtyUtpmhvUED=47O2zN_ z#Ux+Xc)`St^IDv|$knO%utmb9w;58=#rUXR=lG(l)E(GK+r4N`SiC6DJuaKgeD5ca ziUdEw(41f{${|D6-5cEJzn%2Lli#FVH`DS`p^nv-dgUuvE6mhsL7`jza+WJP&Zh9# z$r^JjX3v+8;?JLoS&xEL+yI7(pKZZBicAV$JOn z8yS3UjlL#n66fTkb;--4LLNPIA+=^nnZ~IR_fcj5S5(;VbpA1VfGNoNi|L4)``5t- z*Gu!SJS{YN?{onYbn27Zquk6esZw15?6zWsYaSbO&C2m6h|4$HQmXT8m#Li!j92i| z=D8||558oYe-+0wtugE1vp%P_sM#^x>|NsE@G1_^ed9RChY0_CyyLpiskTzS{D+og z_aCYbcS@RhMxUuX5*T1>@en>{&~h|@x-&G&@p z3sU?JUuS###0$eGf>k^WQ^HQQk3}IOoQsx(OroXkGoop zCVW>Gw5PpvXk3L*oLTi&l5X z<>~9frQrPw z)}}ZMKz`%FFa_fQfXnc28YA$1^nl!cfB*5O%%=R4f&$2f{0RfrN2KI+;IfH390B)t zhx-c3NL>IfU)3|DI^qaEzhTkeAj=5fgNY=dl4?1e{@EJwY8V+uyQHU~VQ*q;tgUxV z>$f1ZUmSdWy(k3%z}pAmXR32m(8~IzAngQrL3|Rto#Fu)9Gv`p)lE#U9q~M({muJd zik-3FV+V#M|Ks|9A^x`?49+h8PN48d7uaxTed62$z7cgyY}vZhwRS7z0QF z^3ieeaq;tT5uYulP0ievk9|5pD?hi9F0N-ap+3!0$ zXdb~*AY9_)pk)TaA|N~&=wWeWEl2R7FXE~x2%iA}N}&MQ4RsI(?T=E|)ld5d2!nhm z{oDg||AfZ7~}&{ z^@E!p(S!X$T%BDswLn+`gyUdvgQGEkJR!CIfo4beqw#zRcGox>=Mnysr-Qx;2=jn2 z&c#dTh#u?Y=Uo5Bf@XKiwnzQQJvhuirKbj*XhqFTnIjcCasMDF;6-9S{b2Qk%H=+&I$b5gZ6u zP&fcC032`xODDhwH~<6zQ$Po}3ch^-Kd{CX@Bq9(j0;%n0%BaiuRm!df4B1oU?AQD z2>e~=50)msDgS8sC#@jh24a1HrQftpfChg)5CC#=2i8EW`(Hc-0S)jg0K~cdKk)fu zEX)7k1E=Dmx+x@&H@H{-Til z7mXVKw{%YZje3cC6f7sGzy8HTka~*xEBH47>gJ!a!2enQ4=o?flm|GY@PDEEw{!6) zl{zS?*YB3zpcXtpnO*)Sv*h1-^q`u!|JmCpRsF5u<2@)>pNBmE#2#tE1!J^h1Vl*u z6MJF&!uo~L3pp3^F04Y5AvKU@$XiGQKar^w-=2M|`=@;i{#|lV zFV*wL-ju5aW?}1pu`^xJ7oWfI}5IrC$a#bUfwCd zFYph3;6~jC0RFBrmwta!9KQ;#Uo&L#PjDTheGC8xkI3Zxmt^umF3A5Y0KD@ezXMq5 z0BZ^WLct19vQj`;Daf4wxYbZlQ~ZAZ!5E;Rgiuk_(9+Q#V*m+iSO7{22!xUfLQQ=X zP*8+|^#BzsHQVWn>NM=v9cYE_a9q0gD2Gn?YDL=#(?Pt5jH6!!{V`51?vp&CXT;8) zJ1;9IuW(uMiiW0^wvMizzM1(A3rkQuCubK|H+K(De?&lF5b|ztqyQ&pFbq~H*$eWprE9pf>6;O$wfgKbR;+{74_+hG;HeEX&vsc z3thTL$8q&hPDLBNu#74Ggrnc!F-{R#tSI3~wBM5b_X!s9pOWk^!Tu@N6u24uDb#=V zO$M3#XWRbOjyw$ppn~K{;5dW=G$sfu00j=S5??c8NQwoX!Qw$x1S zcpO(`fIT}(kpR7~OrJxP?zxHS@YJ6IG*bb{o9q0&;ySWVMr=;7+G)og+5OPLY}sVD=dkXOjNEhk`UKI#5DDjFt3)rM?` zA3tt>4nbGh+S*tdP_ZApR22O2&G*n#897r@1KeHg4pqSRBr4OFBOzGn*cNV-5-}bJ zLzW;~xtJL|#?yMDWw9lmH^bhRE(M!kaWV)gv5}5$x}NXR;?xH7F7Zf<{Awhi(-5Vm zV$*(8tb>5fV(OS@rbnsN6CT`@dJ4zAm1U-Sy);BjQ@J8jTKiZE!2qi${D5X$8$0Lk zP5rLw`avq7t$xH^Y&Gv_8CbU^gDOogI1}AC>P>k~F!r7I~ON7wG zWX^J|(q$j%mSNm1n?bIXReZ(;^UG-7bc&I*i@LkOBNm zIT<)t1pNDwv3ygWFUGQ{mW2|mL!B`fG%Yr z(PV&eg$(>i3uh*39&(}wRq{_ z;=MU$y45gBLQBG2|1n}v$Ki$1CSjbROndN8A~n{Ky|ZplZoMq(b<+ScY{5O%a8$IW z*~6hO)2dT6H`?;p1BwqQdZN9@94KKXJ$}4vU7*8Gj+m6YoIFX)>bgL@v}$haxVW#s zXP^{40w+lp*x4EKKNn4C?i}a5{(iVa%K0t#+b58*|IIb|AIvaqDhZBHiV&FV>FUJy z+(R_N*`OUzzOs9F3W>t|s)vjoP13IzrC{RQLvt|k`b`w3w$U0Z|MUDc(FX zeevdq=$n1X=BG6yk6#tAhRZ^|4BW93O?4GtzF1!v<>@+&Z&J8Mu`Ez!+15W1f`cbiD<#pQc!@RmSyN&NESuPRm7tSw zKg~53m!6=bmHBzxH6^FJJhA|p@etKvKa7-47AzPkpZVZ|8{x<-a1Z)=2`-?e=0wWsnO>)Jdp`5v;_XRtF#_#C4w zmN$LaPX^e^QO0FN;kFj3k+tfC^B;fe47d^sqqj@L#?`WJW?9rJT&cs%>hi$jggHO# z@D}$^Bw$%fviXzo{yR01xZTc>q^K-BhS>AY?p!qYk*28uiK@V}f6gs*EkE znVR28?yW6@NC`U8N5RV?AE8sNzFU& z|7`RR0^0OfWMBZUxkUz&0#}zo9tfbo&^%zQ+9Y)Sk8YKxEe|7u#SS5LWI%L);$NnZ zS>Xm5$exKM1L5mr-~$4<+faT$vt>-8g&!JZL;6P1OA0U1b4utRwgCUXk>_7W`sYB% zz`H;)@T~#(|EgO*0T!2$r0^y$0qenU8dz!8P2IWM8?9e<(;qQUrZaV%FN8V7h@5f^ zhmr>m>kg$vpUhU72To!zJ?*Oot;dP-pN->O7d36t*#i!esVpZEzAM4DYpHwHl_C~+ zJ=sjR$xNl!u$TwMYJ3Xo0{L2{BtF9DRQ~|~eaXqM#K6twuY3Nm)1xwMTeHwRI^ji~N)pwl^KkoBq(#=(Vaghl0Lx zt-Oilh7gY{oLMZi1t?j3SSxMyzRHlc zX-U@_le<9({lvOb;W6oMz-t!7HGzrFg!S6=xml09NpR;O9WoL^ zU#WccQ@w{%fn0wq><~C+DJj_`G0v))%6sF?W6@~j_uBdhJ?{;NWF8*@_ zg$F2REOZhyen=7VISN`jGnt^uTdL8~U>93TT-0o=sUuL2$5x{m-QOU8*jh|_OpoRQl6uUhrje%mmIS$ixE4Wz z07(MEaVU|pV>3;wGv36PKWjs*UP*bpbVaLlF>Yh?@dB=W3-Yv;$Vgx}ufpSyn8RZn zVxuVWfHSSI*Hf^tM zoh11=^^U{qj^bOAzs7xJy*W3Hfj)xUy+cuyF&=#%fj$;C%Fl$CAwluGF)jq=&Q@CD zlsX}Fw$q+rdi5q_cX=r?>00x6sUyBqmEW5calOP4XMCdPRB%hqsqr*aneYop+Bk`Q zzFE2>*mMb6$N8c~8owK%Z0L}yIU_nCU9AGYg%8av#iVV#EB^cw>uoIQPma*R$|iQm`Lt$mrRT8`Oz*g*!A4f2r5IA}O21eV9A zy2og=*u&4)I3jDv+@Z>g@4qKEZP3ZJ8{9SfFs(3ti_y$>^!Q6&Z{bc=!IA1>sf~QC z0AP#0MUix(ZkVRx4z4w7XA+(T^VzlJzk-iy=#X4Lh<@%R#b%SAGU41jD!cTcs)yrx z>3YeJ2AfZh-_W{1T#{l4gNyeaAGk&T_@cM33FQMcSJ(-|!dspLs{+gg!x9^^ zQ?Op5vMn}t-D}EfRCEv#Tec7vm^jvNJ}S9bHM~v#$wJ|=#<{wTLB0{nNYovoyll^y zStpvwNA?RzyM+ao4&_EVZlT+acC&6LU9~C|O$!=XsOOFt74i^^dc1N1W@&%{mYW#n zTWLwx26|$ZXzRc_!HCKpJ&(+j4!<35LB6!mc-mk;GT~me@xP4Ar&$D%x z1QyY{?c7B#8CQdwVyOZdNEo2_pZrZLOrWHv4JUaTM|o#M+Ud=a7L|mBmeS zk&q8bn?SvzHU(9rngDs|u<=lKsUfjqU&cLUFF*Bu%S8|IJS5{RwjgJYj%4u4z)ht& zA)x~q*l#?_`D3xG!7l5rUxV@wJ7gG58q&tkk#TP%` z$qW!*&Dcl=wO1|4TgV5!v1+b;?Um|hwVI$b)e}-US#j^A@S{hNw-30#1BCr@q8MiF zk^k8=D5iyP*j~t*22(_gS5I6x@P?N_YvHE{N!Ohi7h^;!Mgt5KN$LTo_D1{4a_>^@ z_Mh5RdH_K&s z&f(U&n!?HpA70sgI7`zKxqtld(+n?uwEwG+?7FPCV&X&lj1nzwtIFnQ4nU=K2SMwHjU~k#vH_nr>`k4Z+u&!0=!D-J< z(t3$3+|eh_66W9;2=K4=PmNcHg0zQl-ca^O>~T0?=)k$l0Xc)%6@G=4{3lyKRA7mEJ!RyV;O_6 z{TP%ectpm4!A7Foeo|~J6=KR5VvTg_4&zJ?b`R?Q%&2oCB9!u-?Q)$$7=LT>4FX5) zIM-em;8z>QOFT(1ZvTn&?`f^rOgKF0(Mpfvnm1-a2@I)95p%l^PmPl95RR?b%5>L4 znVU|l6qNc*?&O!Pww&?L)8edq4QmKpkEtG#6|oYj9#*?p^EQl&WQO02k&_lcw<&=q zv3!ywe&GR%;pOzQDzt_Ge;`ScD_U-0AF755%?yq4E}w}s8V;D2SW(!!AKjrh9>C0| zl|M82x( z`uN$7EX)i|6A?^<9K-Y+vW=76uvuKI!7q5EihU3Qv#MEA2>8dgv_4!YFxO&i1X&s8k3S^RJF0`ecjBfuVD5$h4TXojZZompu!(KmVH*k z4X%Zl7T($`r860!p?-CTr$c^iY*jz?E@R3Oi=EjP^^lU<@R?K`ULQ}bOAX3$uYa4O z4J|ho!;>19ZQQc3aZ_9gjs+ z9#Lu~RO9+j6v)iXKHi|QvLVSj$s}z@9L@4|s;O=_^-;CO6-8E&N!yYlijT_zZ)V>$ zN=Mo85*5pDd4?(CgQ8GcuZg+WjJa0#o{nV)*!r8cj%<$M7WGD}k@nXf!+r|vh*(s3 zFf(SRxmKp&x=#*J{0M7DSIJHq#~^I7j(G&7*qv+S$*hIfWUt2~M8(l9cDcKfJPgk-MG@)6VeG|0Sk?n4g$vf1Z^@v@oQzj=#kBYceIrx zapFa+edLwE{5&GN-P?-cfOE2rV?n#at1Odk*iA_mOI$1-&L=0C#9rE#*3z)jo=?uxS z5tqA^v=;KUo7>2WOW^Ez1xmk5R|jax!-tjNc}HpAf9DQ9;h_9}O)FrCZ6&IqbZ2Dn zWf=T^Cr{#Akzmi|l-eL3hSl1J#AeRYqLs1Wgt7!>Gs9P_77aHNHnNb(U#;~iwl~rX z+n6e{pAuEPvt@LG`B&fLIoM{<*=)CwTNzh9SW@^ydW5{(V= zJ59|G#`59V?-5JA@RJRVKfev%6l9Xd?Y#CEXiJK$oDCa$jfZL_zYDuy$UDznV;f{i zBw*b}n}ixta*I6Dx;9ef@8RmdE(cyQd=Z=)6A)itb6_Z|)F@i(AqH%aAmvE?aol*j z1!kW1!J#Zk|1G{Xq6bqwE8dyr)=mbvCXBJOlU#b$ZwM;e`3qH814A40x1&< zbp!i|MJ#{v-IDAvvHI3ZU`hD~_2DD@>zzl<<6#OKc&pxhH4nCFl=_?Dnx6Q%{s(TO zD25>TT#+CltWh+vYOZH+vsx)s$F7u=p}DJ>=kZ&Ww^ka$o}3;;sbBIO z?BT57C=K4C=pb6&e72Ou>jIuSbqZD*@>EaF78A$NOh`XT-gn|$ahlMnG;=m&{+1$d z$Ib$r_M`Wqds#sG8G^u+Pbo^41#){Oj9s9rEQ1Wt3v|!Ms9qqV#k%@q$8H97Z_>3s z$R-?sTc@-tJ4vr5z3nG5M7_cGYHynzWlE?SPClz>3YmV$8pY2txQ?y8_bqx)khuY+ z$e&*xHe|<2I8#wD={`73Vj##ZMyv8K^_JYSCT6MeOX8ouCG5t?*?d62X>C?4-0TOc zr;OhA+s|pT*2MN&R*V#3(RGz{agohgJE|3zf(c~fB*W1LQ^@t%ajvtdt}23 zW~j2NDY6%iyYFVT@Y@b1Upyduk36IEp5e9$EqUayGE9{i*&R%~Hi|Vgm>ZAtMtKuB zKcb_807v zNe?tBQjPX`0eS!%Kn5Zu!A0{Sm~)iv-#aDMig(EeFHIN$e%I$@KqG4W@OX9Wp?Ws+ zM}f((Mcg$g=f-#kv~n>*6-u;Q>=;sOxiY>c%iHJJeNI^eXVs*hT{BW=(*H_CewcS( zQbgV1)DMFlbwmL{N(h*15l6Q>wjMv^EsP)oZmkR?XF_M#DFQbE3|3f|0xdIniCXg$ z(g@}?8|GunJY&WD89D9>$52|5YgbMj<`>v8@)}+;rj(4m&Ggf#f}lpIYfI4R5@4!k zrXZS*>s3{b)s#Zo9(n}#gv-u~;UoPV0{?kFF&HC*+i%0!WMAcdG1!LX0;KgX?dfzDQ#9 z2`Kelsr%sZdS$t2YV~#AYHEvi+2onXoYgDQnNDev@&L0CF+?H$%wUVqE#lM;3P1jC z-Ihc#&x@v5$~-wdS64%PKL4z``IrC$(X4YX`e}#+Pb&tc<;0ZgUA)BeJtH&y)WfgK zRi0&t)v4FnOg8NuI6Zu8SIP((a7@B97?zWSX9s0@rb`U!%E@NR5IO7=PeO>j=xo;kwMcBnwBG)(1Sq79!0m)2NilBAaioep| zZV}uXDU6S*yN5EkGgoSCh~Hl|LE5Q=SQDI8>G0jdAVg7kEw^=y@TR^4_OnCxkd(Z+E>W{%}skUAPApC33ez8hfDB z#n1S5TsDb|sL|!CfjfH}uMjReR#QbKnQIN2tY;W!`VE_tXAZ}PN~EUxM!7{Ue0nEf zL%K+4A_J_lPzq#^CSLVX{t{1fZJF$ZE8*C!6&d+7Y1#A`X*K~SErz1{inqwk_44vk zY17MP-lCZ^3rcao?(vn`EnolG0UN%#W_5`8Po_Mzab)0ze?tIduTTd&U zGIbFNN{ApS5*F_ZjrJ#oNf7nD?c1^@gYA>N%bQeu@x-L-na0co89T^zL~lH-)X`vK zZQX9v9FmLg8}CRje=rNcl`n$nJ7^*bC5xt*MA~$zN)MN>k$6jGAEE-P2AU*suu{L7 z@vbZINvU;ZkC(`}DZ{}419vGDbZ%X&lWUL|g_t&crvk&-$2PI3vV;wd&j=F}aOIJi zr-s6mh@Q-R#M!o?UUbNuK-`#R7L%8pidoyuvPWDo&Z+NQH;sp1j!3}Qup>>1=(}ZY zB_6ePDLuO3kRDZbV*IFcp;f7b}VI4i#OmH?wT+>nNOg*#zI$%Q>B{DiaOTVS)CXb5TBF01?3c+v9U173??pDr z2@`X1hSRAr?*_cRQBL08LB&W<95Zcm$#?_x_wB;SxoB~YrkW~(x%Py#;3$G2Lm7g; zo{lZc-IwqfMz`A#gnMS`7Ky_ImZeNtp3ttv&i(S3i>M34*gH7ZA}h}x4gB=%J;XYb zGzT)PgDWsNq4ZOWP#yjgKYwgZhieJN9v6Pzwgz0U`|yqWBi=7ZQ7(7(l~%{YPBahJ zIA^JihjDw}-0gU~hSHgL7pb$=F(y1ni;l>0Zb{Ot9SEQvlM7KYGz#g}si*<=#sA+} zo z^4Q@Ul(&DbvH-+;tCa)R$Btz0iN(bm+FREfq-#SNno2LBY1?qG74ul8kO^y&H8=1C zG;=v-u|BDDXZ(ti&CBqRtTsjs^=7tt_Zw{mQh+6wllr>&~ zB)qGin#GwnYSx`*QFl;|yBCnFob+ny)~%(Rr_*q9)}I>UJ1G|V6)K23 znGigZ=z!0~GBp!qiT<5+_FT7#`819BD4C&}ZzEf-wS@6@S%Oq?D0V-CR;xDM*83`) z;VarvWvn>I0B681u`^%|AS@=~J)*=&e3~?$JCq7Bi?P^aifCnR;YUjKm=7O{PtoH0 zyY`LO1@5P0vSoFJ2|x54o7 z336Q@szw9o*4@&$2OuLpsvIb`dRP!YE2s7(ToGE~t*nFno^@f;rimv4iQc~$0HZJ6 zI#8XNzcy;|)9`KGo5~2>kd=3~Zn&6nv;dWG+`a>{i)Vs(h$*O9+>ChcX{>Al*#gj#aA&m{yHTy?i`r8>rW0 zUj%#YB=ItEsI;ck)MnbWN0U9NS|`WaPCZw{cgUFWzQ69qJHC|vgdx%V@6v`=bbfIV zilrtLrlN7$$%m}QtaM|ra`tD>KR>!#<6`a4u5Cln+1eof=nTtSFj^QttZb3e!DYDi z?!YP(c#w?eZW_zXuVbIk9d;)8de#0wD z-i8~gh!o3eicR~fl$v5s0op^#4>|6f!tEIBVH`>cEjn5d9~9!4$r~r`+5}$5y2%y9 z`ilU){!Wb3lh$9wr=I8g4ZWcq7VsPg6E>_);IYH?cYLQhS`PkpL8u3e5Bh$GXrZsc zI51l)>5vgj7=p)=eT!BwfQT0$@q+P$-~jDnU5~&QV~vBCsDNVBwUL0=o5x_OnY`mo zT+ki)Vw|jXnPdR`L|6W5s*@f;2U7|!9Go?eT&qnDyd>UL`0LEFP)ofg@SYgI4;T>4 z-euAeZE=kDhWN$E!xMU?%bAemjw`7#Gkf!GX$uuJrP9%Iy#@xG46^4u(=0J1lN;Xt zT?$tUcf9I?s=6enhit^fEanzL5BW5UMuG;`C>`4?Kt+t(H7C&CUwg9D z!bTX6md+hnL)aWR8NRI^`A$s2H6F;%nhw+~HJvW>yo5C9cr%4W+4Oi)Y}f;bEXYMH z0A|8*l4wIhOC7-pmsm%*x@T3gUai|CcTiL4A zb5gBP=gs8xaMgSNMs*ZBrRc+ZoKT`X96ZoL)^C!514;=r-fQ;gI5-LJwtG}oWMFQh z^(Zv10Wo5Q8)oN~D^=xELGIe_{$L zV-(qWvnw;b%mci-fa3;o3`%u0tDB++Z2rq`-#f>8b`K|un}zHlw~0+t7{9cc)c^eY zk^k?nz5f{=Phm)Q|IaW#Uf@5&{nhvXSvs)cm6f#8xKwx2?sdL^@ZYuTlLg}*@W0*b zobmhd+#PrST`Qi%xqvqZxQgKO>qqjxzXAtDk83a->;KPSw@>ju!~XF7e~KD-My07A zESCE6yxRW+;-zx7U7reC_xVxm*d*(A{GwFoDa4~^GXl4&^h`PnI_R_Iy~iHsC)JO31bSzyu4GvuzM{oN zuw!;={_Tz*?%NdK-hQl-%b~^IcW>t3va+W&l@m(8uwDy|675jY>~bj7)!M3k;dGkl z`M@VO!A)1A92@%{T-dZ{->IDoW1=fI-D>zMx3-k3RS$;n+3iuMQdUmt&-cXdb5+qD)8jKt>@ zai7VKw#zu(XDJtY>-E($KT9pXY-1{{wwbNh>hP-RyM;hthtux|=}%2}@4mk#z|!-i zlHc-=DH|2dr<{3F^6%cczgO?Oz2^R8@>l*y&z2*ZUY=%3^B7b6^!GXc#`B0k`l}AWO zR=Fkrxg{H~ZEQ~8mi6Eq48E6dEcX;mVxDsg zc;3gVH3w4-^m(hMl~?|Yd(?SxdW7FC`^F@XIr2a2olhrN*;U<|f172^)#KYruQ4o- ze${Jg7_h?PcEiSb%vWzdpX|8&$&F<~&-u=JZ>hPqbJF9mw>%axOO#7k`vOj`UT}@$ z?yqeL%9*84ehTuo&zSgks#t~kCV7V&jiQe@E;c=9xpA{?+4@*-nXlgk=C5>12{Mvo z+o*s1USaP0uWQVgJi2y+l}917pnJ#mHxu@SMtA(-e{^%kj&z9%_Bm5acQ8Ggzh2;X z^zy_1y5d`wvDRNZ?LS+8^4GNGPh11z*VZpPUM4^7K*h~BYgd$+y69X^j(HaDxb||; z;@3`MrazdciJyP%_+5EkTmJg=@0b6?&YM@UM6>emgI|c#0U4QefkiX+L){Tk2w`D4 z;yu6{)d183r;!a<3d|))$4{ecL_KR4S>t0i@HvGzPToZ~2mJt6gh^tYU~`a z$^y^efu2N*aRd*<6x@eYAWSg^9vTU+T~Uv$Kz9mirHn9wfuSP;$q-l-jjkQFWJT7_ d6pf@ETFwS|vjUR{=zw!>hNHk%z~xvF4*)cnXD9#w literal 0 HcmV?d00001 diff --git a/tests/test_import_data.py b/tests/test_import_data.py index 9f24a95d..66bde3d7 100644 --- a/tests/test_import_data.py +++ b/tests/test_import_data.py @@ -1173,6 +1173,15 @@ def test_file_pair_chk_sheets_w_no_aliases_col_skipped(): assert report['NO GO'] == 'Can only check file pairing by aliases' +@pytest.mark.file_operation +def test_file_pair_chk_multiple_aliases(): + """This file contains multiple aliases and various ways to link the paired files + If the check is running properly, should not see any errors.""" + fastq_rows = imp.reader('./tests/data_files/FileFastq_pairing.xlsx', sheetname='FileFastq') + pair_errs = imp.check_file_pairing(fastq_rows) + assert not pair_errs + + @pytest.fixture def mock_profiles(): return {