From 3c5d4000e5c738da00f61725c6ba6242e683e346 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 9 Oct 2025 13:31:18 +0000
Subject: [PATCH 1/4] Initial plan


From 7fcc2fb490881d5f8e5acde9b48761b63f55546e Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 9 Oct 2025 13:39:24 +0000
Subject: [PATCH 2/4] Refactor cdsselector.py to be more professional with
 argparse

- Replace sys.argv with argparse for proper argument parsing
- Add --input-folder, --genes-list, and --output-folder parameters (with short flags)
- Implement proper error handling and validation
- Add logging instead of simple print statements
- Follow PEP 8 style guidelines
- Add main() function with if __name__ == "__main__" guard
- Fix logic bug where FEATURES list was reset incorrectly
- Use os.path.join() for path operations
- Add comprehensive docstrings for all functions
- Add shebang line for better Unix compatibility

Co-authored-by: Mxrcon <48180517+Mxrcon@users.noreply.github.com>
---
 __pycache__/cdsselector.cpython-312.pyc | Bin 0 -> 9237 bytes
 cdsselector.py                          | 281 +++++++++++++++++++-----
 2 files changed, 230 insertions(+), 51 deletions(-)
 create mode 100644 __pycache__/cdsselector.cpython-312.pyc
diff --git a/__pycache__/cdsselector.cpython-312.pyc b/__pycache__/cdsselector.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..21c66f8b12f72e0aa77b846cb5f24f3b3349f99d
GIT binary patch
literal 9237
zcmcIqTTC2TdOp=v)%P1V4ULU)VH-?iV0`z0@r*GSg9jVSp3KNzPg>=!0=nIFF;#^b
zT8)=ot+b#x!p@5s&5nkBS|V>0A|(n@@-Ua3NYTD@8ys3SL>eiUqJ3krwb}K&<Ugk_
zeKEFYR?4yaRMn~f{O3RaIp_N?#czE+H-hr)U;bzG?`jeHZ~R~u#<+R905=bifUYBf
z5^PE8Iz_Iw>o#(wuhZnpTxZ})C)qLkbvuOw=4a@-LtvqH3U;Vnf&*%|;Dp*ExS;k5
zZm4~tU+~;&LP!qVkjNzJOaTg4X!kud9v@Qr4TYv`Q?`hg=WPhNyL+y3SH+|lQzhB$
zes?ska7s*$kE`5pJgJJZ!eNT0Zb^uzMu76e2{9EDl?XR1OJf|LN=YiOiUN0DOr7IX
zw{R~}X>s2Y#c|w0N+qW_byQMBsCkv+Ws!@gpdZI4lhPz~91&9@my9cF8~5(0$Vn6G
z_=L)V7(AB}C*8$-xtNqv`FN^`vB)VzcwjUv^6G>vDs67}Unu;D*x}}&j8DPnDJ~``
zI-w-DjZbl{t??8L(>g3A1ySZY$x~E_%WuIeERoWRMdCWieH2d$ztM%Wdg+g(o7>$b
zk4%h-DOJ(OEgJzO>EQbG(TZka&aSC#<mN5VDOMh*O5C_Cfsz$6gA#%8lrSeCEY*gn
zD>B88iQG6$DH%_RM)y*wiv*b*6=6AGP*~^kCFCaK$s~6Z^vmBC1(@Ch=uz(AdidLM
z?m`=Pk(Xf}?%pvzp6uWRXiZ4Ugg8c;&yL{cwwN^L{vF<Qmm-wL!rmDeiW!@cTv(`S
z_&vQ0)dQp&8x)}g?9B3_S`7*FfLSew9I#*04>30=u*I1Oo38JcQp52P*c4n+8X3Xc
zW>}J8Bf_l69&bxW8k%?|rpCv_h{CmU4NXb$wwT0PG@i#`8DKL-lypVIwbq8Q)&_xV
z=<jH_*wJt`LTfCc*X+P|j8`>>0Ec0COuYoJ9LAWM=O(YjW4bIFGjOSI=x@<eEar26
z>9*8-il|PEM~#lMA87W#Pr=m;dct_tnd%%<z2OhdC+Cv+Ku12f{|^p^waZ?hDABD0
z8i;PU;O2F@6{y!}*H+N&G|Hd^=3Z6=MqmZI;1Ha@c0I%^3x#?aty}N_tyl1U4_dzv
zAbf*mv@U&&;@X6|t0+>D-gB9k0d`;$j`1mh90-6P<B+zcJK+q$z9;ZzLWd4&=AqP2
zBP<@A4S*272&&S*arb`2kB!4|{Q)ooXPiI@?6hK#AsEx9ex$yDFy_&WLeE#BSsXAG
z;0%*-ts^G1MvE9r2C!P3Fpj_`^$4PV6yj1!3+THTv=WUac}3Ce0OGJ2)BaL4OWRuG
z?`m|<z?F2v03H~Q6~WPMkc}u226D<^I^qB`nqy2<`P;m#F{5H~Jnb#k1#lkGn7)C*
z-gE@krv&5hY&d)}6|PBwTNC9X=|CAW0~aaXG^AS+Lo|HwO?8$Us|c$(VPA~n-J<zT
z7$=)Y)^=avCwmGi>IaGY$WaF+X<l9sqUM&;+=K&uRsino<VjXArg5?lYAs+9=!QbZ
z2D=e{%BN7l`Syj@eFt*B1Nq7;8)0re+?fk^ZrYfj_nE`#x6kx#RwCAbfV?xja3Eh1
z$_GPv-|<ZsV#Ci6%i1^nMI==V3?Zq?``&yBNzKA>lgp_R67P<6Pi@XqyXvXSGIct>
zSojR$+Tlm=<sP&GzQChiw!`j(fd?iHzGl!vgP-D8Ej$sZ4Eh9pYBS&clyV`}I_ViC
z*d7|T;RKqdGE@(mx5aEDC}#U$KSI;Cj4k0Q9!0-5aY3Q(R=%3zzU^)UgQn?en?U1#
z=4cdB@fwnnz_?si1V*hen|@52(tm;kmduASTL2O1s~{}}ziHbf+K-MPg_?xtYk-y%
zx_kU1`Wl*~_9JzN$?pJ?cTO|vPV-r?XP8nQ3G_5`hnb`%k={Gv;M+mDZhXiGCEjqd
z^s}S;ZN$}RBLt*^ZEqkM?n8isKo!@dsB)x(yUeSj*fGJj6Cdef=c8~i2`kKi+*|=;
zCZ~$nVwb9e8^n&4G+gjkbT_BajgKoPtl~zD1D7PVPnt*xy|Rp*kbz+$2pO38X+$JJ
z0%!n#loLOKAuL%2hDv&22Nayk6enU{oGkLhvo$A1x}|rWH@#AQwwUWM7NCREDEU&_
zfwc-|A??+5r5lcP9T~~uSt?%#7jbJgNzs^bP$pn2mQv9wUYMW8NaJEkV?p!raoGvW
zt}*e{u%yvRXm%L$0oZcm0Ti-M8a1pj*yqz+xG{>gqEVuP4<1&c0YUyG$W815p^)Ez
z*B9WYd=AwNdRB|P{`pgLr{>?9duyS2&C{^q-<#dnxpIAFFv|_B`Y&Xi7d9C?+qvPZ
zxYxW8TlMYRM3k$STIBObkNs!3XX(JQ`p@ZqNdHr2q4Kj`AMg6KHXH7l9f0b8X-xOT
z8JPFZdGAgBLRoD8<W9D(Z7I3ZvC@(aonLeI=L6xaGyJD57S;4pdIz2Tch8*2yK@Us
zFtR_C4WC^8a5<K(>R9!k&N@$jw@Jf`=Sm+;;_toZBIsY)b9?A7Q0QFP{zcUeTsPTp
zOLHr{{4$K+ez}J|*Xa0ip98l<T-_}F<xzh(qQ62c+{d<sy1)5<hPu~)s0D*CO{0{6
zs0Dn2tsF#)Tx;wpmX!h*fK$qV(-6QZ0Bj;qMhuSti-vw=TZRJ2tOm%m0ksXFydVYh
zf)o}+CM<9(a2Hq=#?(v3G5`Rf_7@=3W<e-G9)R6@CfL2lC*#-!*Mm)brio)@`X~C4
zgTpVF_@$Kk0xv~G-M=z?iES{kco++L$jxg|5P+r-H{;Nl01<Jg%J_V5STv-32^@lo
zCd2|Frle90kO-YX9T`3y9w>J3if}N!SAT=S3?nLj#d8oslT^YKpdFoNa9EqBp-wy7
z0GDn;h9JG`Qu&Z%I6U{XouE9It}GF%3DOPlHsX=%VSC8?a0S*L$p@g)*ti0tYVN|s
zAoK$G!;MZcB`A||b(FxUd=Phngb4zN)LXoy@*%<rn-N99ZjFVQq1(omik8ry0vZ8+
z%D+KX4xBA(o`VE9J61YZ+AQFF;_(q|Ju`P^p?1yFxZw|Hck3iI2DUy4@A}OBv3s#|
zHGKFFi1M}HJCYApJaFH4XLp}m3!d7dkv{;C=xd+pC70jZk+*^{I<cHw{>yAl&#J#S
z>+FSC@#lj-8N7G$7yB2)Pns8MvkmPlEh}}|ivBfbAnysz4E|{=0DKhe$bE5iS2x4N
zilc4@nz16DUM8S7f_*ELfQItYgFg6cfC@T8Z|C_56iHB>1W2N{i`#F*dTjqju+_j&
z0!?ZLa{=$|BzSL@uUL>IV*wI)Q2j5FguRV9ybo~l5=r3g@b>QndqI|ICc|VX!fX0_
zNHfo%3}`ND37bI>ojUjm6SBbk<1n-|1mz*ErDxi}nTScUz*%7^#s_TT^_&?wq@*hK
z;Glcd=-C8`mn(z`_;Kgr(q%m(X0>wC%><lT3vDd_fosCY2s#bI1$%5IDLdj#TAj$v
z3s7f`(Sb9Ex)6|90>FTi2D1ha%A7Z0pxG5^LXL@=b4*G`)hP(^@SaS2KqhG{9^-m0
zbwz9%4Jk>D!E(x-@H)b1PLqws;9i>h!wERH!*R%8YIc4c+$jP3ONylxxfMEU6{Z+b
ztG~mzGyEtr%Q7b_p!c%+$i2Ypr|?t$5vm!qS%qqLXPMBqJNK;bY|QO!+(K^l=&a*g
z&yM-dxz2?*A9?mKxtB-QPxj~Fe-pSpRqKKJT%dljITvVM;&Xw+%NJJzUHM@Bdax-M
zY+9;*96YjVN09fqF?VAj`pDO^T(xp){q)7$>5Gq!T+D~}u7{g*;pU~2x$x2DBf0R2
z*#U5-76u*#8p^nYK$gug+TAj9VU4NIdqN<o9^T<2Zx-xzlH-2~765YC)HLL%r`Ze*
z$NfLxxZ7Xjt85$aW$b1+HSPE_`cp;yoe&mUK#;)FmK6XTdvTCNA%HMf!8=K8=Lb0o
z0UykBFU-@eTK-SQ4PJ=>k<*@xD`9yvg`qQ^hj6ZO0U)YpP|<kXcsv<b#t9>0J(C_8
zMqttqW<s#{>bbGybG!Rn80*!2TsUYH+!+>X&!=ob&M7AZo0Yl@2tlxUXxf`$4N2G{
zu2m*Y;R;|1S{57`FX*}`$i0RH=d|~Z_a1dOtlt#{r(V9=1i#CAo*N@eiN#k;Ply;@
z!+rZf&e=e*+k$-*0x&odGP3rizFcXtsD%E{!b8JZ!2ZMwFnYek<txkf>wxDq6ay)1
zh&UY}0(PB#hxb-Ge03s*KMo8}B;nhN@ufh7$TVG{ho43hPIsqm9o(gKsK5=ULm_La
zia_q=6wFD4K(>WTJGf!U*^5HNTkx_pCLzUBMZOc>sv4`OwlM0+XGz1P99JRo?ENSv
zj;lD6$dceX;*(F{_r4O9>(J{d(hy|E^nD--(HKP8s^%g6jR$vuR5R{L1nn`{l82xL
z<+|BZ<Ssn4fz`lRZ<gDjRl%toycMjXC+I?k7&t)2`qq=R^7}yj8Sd5u`UiNgMAhM$
zq5O`WGw*y`RlkKQ*)D3<y%DN@@c#YxAKbWqW3gq~y&5_VZB^k1BlkxhB=08|&n!2u
zR-KvkY%)Pt7q#KtIX^fzxX`)eUd&``j<0!70KRkkvfkQ_y8WNs`uNsT^KyK(?(7z_
zffwlB*mq>Pc_qHOum4^kALQ192Xn!LOM%D1H#b=zc;-gF9rN$Zy^{?$Egf2(TJv`0
zy}K7a%z3%lA!DrU?qkarmOHaGXCHgJHtVs}&sq@c#eQOTXVcQ&Y*YJ6?dP{X{~&wi
zTDIc)W9CPHeC9&I8V~^_&U>pDj^w<xv+rPj7w0Z!Yg?BROE*?_taPm$%kFx6-Fx<t
z_w08d4^Tf>u*dX;=bXL!u<fhEuAT$zSM6NSUiLS8S-6kEORHF^#46Zc#h)t7v>k;2
z?G2Pf$ZVZPs%2#om`7O=r~<~lh7!BB+t`p&`|%ncH1HtbgiFQvK|CO0-p($S@hDiN
zZAUa`Jxvu-lH1SEqO=u~jS0&V6y}krF5H`U+e>P4RJsc3%0$@gQ@FmmHzZqMy|R#0
z{T_PSZH~A7?6>EUfh@*D!xMU~9CmZQ@WIgmRuujcKrdiL_f6A9v)Ja_yx`2xr7H_o
z(=|=sp{LyXwa8|A(x)!Mmv3%(A_ZS7;cFC_4M~oTijXAK^Bp4jlBIv-#smhIwWTYI
zKk;*l%FFP*KJ70ykk}Z2RRUXuJ$`r!l-NUPsP2YDDgMr{W3FM5;l;iZ?!)kLUKs^4
zVp$9urJFQgAFuLBJ?jcaN$v+tMS|oTC;1RkWMzoaFp`^CAo&u|l3b;X<L-ieB3S?k
z7Wi8tMEeRsYIzWoT*TZNjlo}Ih{2JEa5F)Fyt)|qb<k(vQ*}zd0c~HyPdP|1{s~j@
zv!P9<maTXas9Ozivy73r%!UrFIa@Z2Easg>VZHHGuJKg1;mvHQea+cnzR4{1{A%cz
zL)rbuv!N4f&XZ>Qj~CVTgYCJ4?b*hTZ0Piwv-6o9h4wEV|JB>Se0#O>*sKelt6P?O
z)?3cxTF$Iiy*2BBmf(*0$+^k-J9Bpyg!Q_^xw^w^fg>A`s|CnkV07Pq&3ni~`lHLo
z*N^q)j`e1b_GN3%uX+1{iK`BGej``+#&YHQk?!1)?$57fFaK!GdBd6<o<cql{Noc(
zC7?cAfyMUC`4)VO{NmMYRb<WCyb%aujAtvJEBIjF?G5(sL0|82^*QOU8^e7x{qHmj
z_nI>r#hQ&qBSFm(jS5mM8kKPmeWB~&PNS76MWZ1ml`*;(?S(h+BUx+WWnc?TmI!mj
zAz3u4F_6qtWt?p!(F<A3=g!VCNtl2?){wD<fu%$W4;AExDeCw33W|B!h^XK<$o~y;
zeS_Tpg+g0)J7wS6QBUpN>Zhuy&aLCSsfw-6T~yUp$A0PtwKYg_RK;c=g&6N_`Vn1|
cXS_4rKkxrZ|J=KO_1<Hqa+5~PE+WGJ0kx6CeE<Le

literal 0
HcmV?d00001

diff --git a/cdsselector.py b/cdsselector.py
index 8341434..98e5042 100644
--- a/cdsselector.py
+++ b/cdsselector.py
@@ -1,58 +1,237 @@
-# -*- coding:utf-8 -*-
-"""    
-	Propouse:
-		This script removes CDS that arent on the allowed list, 
-		with annoted GBK files you can create a GBK that only
-		contains CDS that are ´allowed´ by the list.
-
-	Usage:	
-	python cdsselector.py $GBK_FOLDER $OUTDIR #LIST_FILE
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+CDS Selector
 
-	$GBK_FOLDER = Folder containing all gbk files that will be used as input
+This script filters CDS (Coding Sequences) from annotated GenBank files,
+keeping only those that are in an allowed gene list. The output is a new
+GenBank file containing only the selected features.
 
-	$OUTDIR = Name of the output dir
+Usage:
+    python cdsselector.py --input-folder <input_dir> --genes-list <genes_file>
+                          --output-folder <output_dir>
 
-	$LIST_FILE = A file containt a gene on every line following this example:
-	dnaA
-	dnaK
-	rpoB
+Arguments:
+    --input-folder    : Folder containing GenBank (.gbk) files to process
+    --genes-list      : File containing one gene name per line
+    --output-folder   : Folder where filtered GenBank files will be saved
 
-	OBS: the list file shouldn't contain and special character.
+Author: Davi J. Marcon
+Email: davijosuemarcon@gmail.com
 """
-from Bio import SeqIO
-import sys
+
+import argparse
+import logging
 import os
+import sys
+from Bio import SeqIO
+
+
+def setup_logging():
+    """Configure logging for the script."""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+
+
+def parse_arguments():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description='Filter CDS features from GenBank files based on a gene list.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python cdsselector.py --input-folder gbk_files --genes-list genes.txt
+                          --output-folder filtered_gbk
+    python cdsselector.py -i input_gbk -g allowed_genes.txt -o output_gbk
+        """
+    )
+
+    parser.add_argument(
+        '--input-folder', '-i',
+        required=True,
+        metavar='DIR',
+        help='Input folder containing GenBank files'
+    )
+
+    parser.add_argument(
+        '--genes-list', '-g',
+        required=True,
+        metavar='FILE',
+        help='File containing gene names (one per line)'
+    )
+
+    parser.add_argument(
+        '--output-folder', '-o',
+        required=True,
+        metavar='DIR',
+        help='Output folder for filtered GenBank files'
+    )
+
+    return parser.parse_args()
+
+
+def load_gene_list(genes_file):
+    """
+    Load gene names from a file.
+
+    Args:
+        genes_file (str): Path to the file containing gene names
+
+    Returns:
+        list: List of gene names
+
+    Raises:
+        FileNotFoundError: If the gene list file does not exist
+        IOError: If there is an error reading the file
+    """
+    if not os.path.exists(genes_file):
+        raise FileNotFoundError(f"Gene list file not found: {genes_file}")
+
+    try:
+        with open(genes_file, 'r') as f:
+            gene_list = [line.strip() for line in f if line.strip()]
+        logging.info(f"Loaded {len(gene_list)} genes from {genes_file}")
+        return gene_list
+    except IOError as e:
+        raise IOError(f"Error reading gene list file: {e}")
+
+
+def validate_input_folder(input_folder):
+    """
+    Validate that the input folder exists and contains GenBank files.
+
+    Args:
+        input_folder (str): Path to the input folder
+
+    Returns:
+        list: List of GenBank files in the folder
+
+    Raises:
+        FileNotFoundError: If the folder does not exist
+        ValueError: If no GenBank files are found
+    """
+    if not os.path.exists(input_folder):
+        raise FileNotFoundError(f"Input folder not found: {input_folder}")
+
+    if not os.path.isdir(input_folder):
+        raise ValueError(f"Input path is not a directory: {input_folder}")
+
+    files = [f for f in os.listdir(input_folder)
+             if f.endswith(('.gbk', '.gb', '.genbank'))]
+
+    if not files:
+        raise ValueError(f"No GenBank files found in {input_folder}")
+
+    logging.info(f"Found {len(files)} GenBank file(s) in {input_folder}")
+    return files
+
+
+def filter_features(seq_record, gene_list):
+    """
+    Filter features from a sequence record based on the gene list.
+
+    Args:
+        seq_record (SeqRecord): BioPython SeqRecord object
+        gene_list (list): List of allowed gene names
+
+    Returns:
+        SeqRecord: SeqRecord with filtered features
+    """
+    filtered_features = []
+
+    for feature in seq_record.features:
+        # Keep source features with genomic DNA mol_type
+        if feature.type == "source" and "mol_type" in feature.qualifiers:
+            if feature.qualifiers["mol_type"][0] == 'genomic DNA':
+                filtered_features.append(feature)
+
+        # Keep CDS and gene features that are in the gene list
+        elif feature.type in ["CDS", "gene"] and "gene" in feature.qualifiers:
+            gene_name = feature.qualifiers['gene'][0]
+            if gene_name in gene_list:
+                filtered_features.append(feature)
+
+    seq_record.features = filtered_features
+    return seq_record
+
+
+def process_genbank_files(input_folder, output_folder, gene_list, input_files):
+    """
+    Process all GenBank files in the input folder.
+
+    Args:
+        input_folder (str): Input folder path
+        output_folder (str): Output folder path
+        gene_list (list): List of allowed gene names
+        input_files (list): List of GenBank files to process
+    """
+    processed_count = 0
+    error_count = 0
+
+    for input_file in input_files:
+        input_path = os.path.join(input_folder, input_file)
+        output_path = os.path.join(output_folder, input_file)
+
+        try:
+            logging.info(f"Processing: {input_file}")
+
+            # Parse and filter each sequence record
+            filtered_records = []
+            for seq_record in SeqIO.parse(input_path, 'genbank'):
+                filtered_record = filter_features(seq_record, gene_list)
+                filtered_records.append(filtered_record)
+
+            # Write filtered records to output file
+            SeqIO.write(filtered_records, output_path, 'genbank')
+            processed_count += 1
+            logging.info(f"Successfully processed: {input_file}")
+
+        except Exception as e:
+            error_count += 1
+            logging.error(f"Error processing {input_file}: {e}")
+
+    msg = f"Processing complete: {processed_count} succeeded, {error_count} failed"
+    logging.info(msg)
+
+
+def main():
+    """Main function to orchestrate the CDS selection process."""
+    setup_logging()
+
+    try:
+        # Parse command line arguments
+        args = parse_arguments()
+
+        logging.info("CDS Selector started")
+        logging.info(f"Input folder: {args.input_folder}")
+        logging.info(f"Genes list: {args.genes_list}")
+        logging.info(f"Output folder: {args.output_folder}")
+
+        # Load gene list
+        gene_list = load_gene_list(args.genes_list)
+
+        # Validate input folder and get list of files
+        input_files = validate_input_folder(args.input_folder)
+
+        # Create output folder if it doesn't exist
+        if not os.path.exists(args.output_folder):
+            os.makedirs(args.output_folder)
+            logging.info(f"Created output folder: {args.output_folder}")
+
+        # Process all GenBank files
+        process_genbank_files(args.input_folder, args.output_folder,
+                              gene_list, input_files)
+
+        logging.info("CDS Selector finished successfully")
+        return 0
+
+    except Exception as e:
+        logging.error(f"Fatal error: {e}")
+        return 1
+
 
-#I/O declaration
-
-input_folder = sys.argv[1]
-output_folder = sys.argv[2]
-gene_list = [line.strip('\n') for line in open(sys.argv[3])]
-
-#Taking permission for parsing later
-for i in os.listdir(input_folder):
-	open(input_folder+"/"+i)
-
-#Check output folder existance
-if not os.path.exists(output_folder):
-    os.makedirs(output_folder)
-
-#Code
-for input_file in os.listdir(input_folder): 
-	FEATURES = []
-	for seq_record in SeqIO.parse(input_folder+"/"+input_file , 'genbank'):# Parse all files and interact around them
-		for feature in seq_record.features: # Check each feature and see if it is on gene_list
-			if feature.type == "CDS" and "gene" in feature.qualifiers:
-				gene = feature.qualifiers['gene'][0]
-				if gene in gene_list:
-					FEATURES.append(feature)
-			if feature.type == "gene" and "gene" in feature.qualifiers:
-				gene = feature.qualifiers['gene'][0]
-				if gene in gene_list:
-					FEATURES.append(feature)
-			if feature.type == "source" and "mol_type" in feature.qualifiers:
-				if feature.qualifiers["mol_type"][0] == 'genomic DNA':
-					FEATURES.append(feature)# If the feature is on the gene_list append them to the new feature list
-			seq_record.features = FEATURES
-	print("Creating input for: "+input_file)
-	SeqIO.write (seq_record, output_folder+"/"+input_file, 'genbank')
+if __name__ == "__main__":
+    sys.exit(main())

From e630cb5fcae851f31d21a4d42a1bb8934d46ba47 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 9 Oct 2025 13:40:04 +0000
Subject: [PATCH 3/4] Add .gitignore to exclude Python cache files

Co-authored-by: Mxrcon <48180517+Mxrcon@users.noreply.github.com>
---
 .gitignore                              |  38 ++++++++++++++++++++++++
 __pycache__/cdsselector.cpython-312.pyc | Bin 9237 -> 0 bytes
 2 files changed, 38 insertions(+)
 create mode 100644 .gitignore
 delete mode 100644 __pycache__/cdsselector.cpython-312.pyc

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..fcf176a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,38 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+venv/
+ENV/
+env/
+
+# IDEs
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+Thumbs.db
diff --git a/__pycache__/cdsselector.cpython-312.pyc b/__pycache__/cdsselector.cpython-312.pyc
deleted file mode 100644
index 21c66f8b12f72e0aa77b846cb5f24f3b3349f99d..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 9237
zcmcIqTTC2TdOp=v)%P1V4ULU)VH-?iV0`z0@r*GSg9jVSp3KNzPg>=!0=nIFF;#^b
zT8)=ot+b#x!p@5s&5nkBS|V>0A|(n@@-Ua3NYTD@8ys3SL>eiUqJ3krwb}K&<Ugk_
zeKEFYR?4yaRMn~f{O3RaIp_N?#czE+H-hr)U;bzG?`jeHZ~R~u#<+R905=bifUYBf
z5^PE8Iz_Iw>o#(wuhZnpTxZ})C)qLkbvuOw=4a@-LtvqH3U;Vnf&*%|;Dp*ExS;k5
zZm4~tU+~;&LP!qVkjNzJOaTg4X!kud9v@Qr4TYv`Q?`hg=WPhNyL+y3SH+|lQzhB$
zes?ska7s*$kE`5pJgJJZ!eNT0Zb^uzMu76e2{9EDl?XR1OJf|LN=YiOiUN0DOr7IX
zw{R~}X>s2Y#c|w0N+qW_byQMBsCkv+Ws!@gpdZI4lhPz~91&9@my9cF8~5(0$Vn6G
z_=L)V7(AB}C*8$-xtNqv`FN^`vB)VzcwjUv^6G>vDs67}Unu;D*x}}&j8DPnDJ~``
zI-w-DjZbl{t??8L(>g3A1ySZY$x~E_%WuIeERoWRMdCWieH2d$ztM%Wdg+g(o7>$b
zk4%h-DOJ(OEgJzO>EQbG(TZka&aSC#<mN5VDOMh*O5C_Cfsz$6gA#%8lrSeCEY*gn
zD>B88iQG6$DH%_RM)y*wiv*b*6=6AGP*~^kCFCaK$s~6Z^vmBC1(@Ch=uz(AdidLM
z?m`=Pk(Xf}?%pvzp6uWRXiZ4Ugg8c;&yL{cwwN^L{vF<Qmm-wL!rmDeiW!@cTv(`S
z_&vQ0)dQp&8x)}g?9B3_S`7*FfLSew9I#*04>30=u*I1Oo38JcQp52P*c4n+8X3Xc
zW>}J8Bf_l69&bxW8k%?|rpCv_h{CmU4NXb$wwT0PG@i#`8DKL-lypVIwbq8Q)&_xV
z=<jH_*wJt`LTfCc*X+P|j8`>>0Ec0COuYoJ9LAWM=O(YjW4bIFGjOSI=x@<eEar26
z>9*8-il|PEM~#lMA87W#Pr=m;dct_tnd%%<z2OhdC+Cv+Ku12f{|^p^waZ?hDABD0
z8i;PU;O2F@6{y!}*H+N&G|Hd^=3Z6=MqmZI;1Ha@c0I%^3x#?aty}N_tyl1U4_dzv
zAbf*mv@U&&;@X6|t0+>D-gB9k0d`;$j`1mh90-6P<B+zcJK+q$z9;ZzLWd4&=AqP2
zBP<@A4S*272&&S*arb`2kB!4|{Q)ooXPiI@?6hK#AsEx9ex$yDFy_&WLeE#BSsXAG
z;0%*-ts^G1MvE9r2C!P3Fpj_`^$4PV6yj1!3+THTv=WUac}3Ce0OGJ2)BaL4OWRuG
z?`m|<z?F2v03H~Q6~WPMkc}u226D<^I^qB`nqy2<`P;m#F{5H~Jnb#k1#lkGn7)C*
z-gE@krv&5hY&d)}6|PBwTNC9X=|CAW0~aaXG^AS+Lo|HwO?8$Us|c$(VPA~n-J<zT
z7$=)Y)^=avCwmGi>IaGY$WaF+X<l9sqUM&;+=K&uRsino<VjXArg5?lYAs+9=!QbZ
z2D=e{%BN7l`Syj@eFt*B1Nq7;8)0re+?fk^ZrYfj_nE`#x6kx#RwCAbfV?xja3Eh1
z$_GPv-|<ZsV#Ci6%i1^nMI==V3?Zq?``&yBNzKA>lgp_R67P<6Pi@XqyXvXSGIct>
zSojR$+Tlm=<sP&GzQChiw!`j(fd?iHzGl!vgP-D8Ej$sZ4Eh9pYBS&clyV`}I_ViC
z*d7|T;RKqdGE@(mx5aEDC}#U$KSI;Cj4k0Q9!0-5aY3Q(R=%3zzU^)UgQn?en?U1#
z=4cdB@fwnnz_?si1V*hen|@52(tm;kmduASTL2O1s~{}}ziHbf+K-MPg_?xtYk-y%
zx_kU1`Wl*~_9JzN$?pJ?cTO|vPV-r?XP8nQ3G_5`hnb`%k={Gv;M+mDZhXiGCEjqd
z^s}S;ZN$}RBLt*^ZEqkM?n8isKo!@dsB)x(yUeSj*fGJj6Cdef=c8~i2`kKi+*|=;
zCZ~$nVwb9e8^n&4G+gjkbT_BajgKoPtl~zD1D7PVPnt*xy|Rp*kbz+$2pO38X+$JJ
z0%!n#loLOKAuL%2hDv&22Nayk6enU{oGkLhvo$A1x}|rWH@#AQwwUWM7NCREDEU&_
zfwc-|A??+5r5lcP9T~~uSt?%#7jbJgNzs^bP$pn2mQv9wUYMW8NaJEkV?p!raoGvW
zt}*e{u%yvRXm%L$0oZcm0Ti-M8a1pj*yqz+xG{>gqEVuP4<1&c0YUyG$W815p^)Ez
z*B9WYd=AwNdRB|P{`pgLr{>?9duyS2&C{^q-<#dnxpIAFFv|_B`Y&Xi7d9C?+qvPZ
zxYxW8TlMYRM3k$STIBObkNs!3XX(JQ`p@ZqNdHr2q4Kj`AMg6KHXH7l9f0b8X-xOT
z8JPFZdGAgBLRoD8<W9D(Z7I3ZvC@(aonLeI=L6xaGyJD57S;4pdIz2Tch8*2yK@Us
zFtR_C4WC^8a5<K(>R9!k&N@$jw@Jf`=Sm+;;_toZBIsY)b9?A7Q0QFP{zcUeTsPTp
zOLHr{{4$K+ez}J|*Xa0ip98l<T-_}F<xzh(qQ62c+{d<sy1)5<hPu~)s0D*CO{0{6
zs0Dn2tsF#)Tx;wpmX!h*fK$qV(-6QZ0Bj;qMhuSti-vw=TZRJ2tOm%m0ksXFydVYh
zf)o}+CM<9(a2Hq=#?(v3G5`Rf_7@=3W<e-G9)R6@CfL2lC*#-!*Mm)brio)@`X~C4
zgTpVF_@$Kk0xv~G-M=z?iES{kco++L$jxg|5P+r-H{;Nl01<Jg%J_V5STv-32^@lo
zCd2|Frle90kO-YX9T`3y9w>J3if}N!SAT=S3?nLj#d8oslT^YKpdFoNa9EqBp-wy7
z0GDn;h9JG`Qu&Z%I6U{XouE9It}GF%3DOPlHsX=%VSC8?a0S*L$p@g)*ti0tYVN|s
zAoK$G!;MZcB`A||b(FxUd=Phngb4zN)LXoy@*%<rn-N99ZjFVQq1(omik8ry0vZ8+
z%D+KX4xBA(o`VE9J61YZ+AQFF;_(q|Ju`P^p?1yFxZw|Hck3iI2DUy4@A}OBv3s#|
zHGKFFi1M}HJCYApJaFH4XLp}m3!d7dkv{;C=xd+pC70jZk+*^{I<cHw{>yAl&#J#S
z>+FSC@#lj-8N7G$7yB2)Pns8MvkmPlEh}}|ivBfbAnysz4E|{=0DKhe$bE5iS2x4N
zilc4@nz16DUM8S7f_*ELfQItYgFg6cfC@T8Z|C_56iHB>1W2N{i`#F*dTjqju+_j&
z0!?ZLa{=$|BzSL@uUL>IV*wI)Q2j5FguRV9ybo~l5=r3g@b>QndqI|ICc|VX!fX0_
zNHfo%3}`ND37bI>ojUjm6SBbk<1n-|1mz*ErDxi}nTScUz*%7^#s_TT^_&?wq@*hK
z;Glcd=-C8`mn(z`_;Kgr(q%m(X0>wC%><lT3vDd_fosCY2s#bI1$%5IDLdj#TAj$v
z3s7f`(Sb9Ex)6|90>FTi2D1ha%A7Z0pxG5^LXL@=b4*G`)hP(^@SaS2KqhG{9^-m0
zbwz9%4Jk>D!E(x-@H)b1PLqws;9i>h!wERH!*R%8YIc4c+$jP3ONylxxfMEU6{Z+b
ztG~mzGyEtr%Q7b_p!c%+$i2Ypr|?t$5vm!qS%qqLXPMBqJNK;bY|QO!+(K^l=&a*g
z&yM-dxz2?*A9?mKxtB-QPxj~Fe-pSpRqKKJT%dljITvVM;&Xw+%NJJzUHM@Bdax-M
zY+9;*96YjVN09fqF?VAj`pDO^T(xp){q)7$>5Gq!T+D~}u7{g*;pU~2x$x2DBf0R2
z*#U5-76u*#8p^nYK$gug+TAj9VU4NIdqN<o9^T<2Zx-xzlH-2~765YC)HLL%r`Ze*
z$NfLxxZ7Xjt85$aW$b1+HSPE_`cp;yoe&mUK#;)FmK6XTdvTCNA%HMf!8=K8=Lb0o
z0UykBFU-@eTK-SQ4PJ=>k<*@xD`9yvg`qQ^hj6ZO0U)YpP|<kXcsv<b#t9>0J(C_8
zMqttqW<s#{>bbGybG!Rn80*!2TsUYH+!+>X&!=ob&M7AZo0Yl@2tlxUXxf`$4N2G{
zu2m*Y;R;|1S{57`FX*}`$i0RH=d|~Z_a1dOtlt#{r(V9=1i#CAo*N@eiN#k;Ply;@
z!+rZf&e=e*+k$-*0x&odGP3rizFcXtsD%E{!b8JZ!2ZMwFnYek<txkf>wxDq6ay)1
zh&UY}0(PB#hxb-Ge03s*KMo8}B;nhN@ufh7$TVG{ho43hPIsqm9o(gKsK5=ULm_La
zia_q=6wFD4K(>WTJGf!U*^5HNTkx_pCLzUBMZOc>sv4`OwlM0+XGz1P99JRo?ENSv
zj;lD6$dceX;*(F{_r4O9>(J{d(hy|E^nD--(HKP8s^%g6jR$vuR5R{L1nn`{l82xL
z<+|BZ<Ssn4fz`lRZ<gDjRl%toycMjXC+I?k7&t)2`qq=R^7}yj8Sd5u`UiNgMAhM$
zq5O`WGw*y`RlkKQ*)D3<y%DN@@c#YxAKbWqW3gq~y&5_VZB^k1BlkxhB=08|&n!2u
zR-KvkY%)Pt7q#KtIX^fzxX`)eUd&``j<0!70KRkkvfkQ_y8WNs`uNsT^KyK(?(7z_
zffwlB*mq>Pc_qHOum4^kALQ192Xn!LOM%D1H#b=zc;-gF9rN$Zy^{?$Egf2(TJv`0
zy}K7a%z3%lA!DrU?qkarmOHaGXCHgJHtVs}&sq@c#eQOTXVcQ&Y*YJ6?dP{X{~&wi
zTDIc)W9CPHeC9&I8V~^_&U>pDj^w<xv+rPj7w0Z!Yg?BROE*?_taPm$%kFx6-Fx<t
z_w08d4^Tf>u*dX;=bXL!u<fhEuAT$zSM6NSUiLS8S-6kEORHF^#46Zc#h)t7v>k;2
z?G2Pf$ZVZPs%2#om`7O=r~<~lh7!BB+t`p&`|%ncH1HtbgiFQvK|CO0-p($S@hDiN
zZAUa`Jxvu-lH1SEqO=u~jS0&V6y}krF5H`U+e>P4RJsc3%0$@gQ@FmmHzZqMy|R#0
z{T_PSZH~A7?6>EUfh@*D!xMU~9CmZQ@WIgmRuujcKrdiL_f6A9v)Ja_yx`2xr7H_o
z(=|=sp{LyXwa8|A(x)!Mmv3%(A_ZS7;cFC_4M~oTijXAK^Bp4jlBIv-#smhIwWTYI
zKk;*l%FFP*KJ70ykk}Z2RRUXuJ$`r!l-NUPsP2YDDgMr{W3FM5;l;iZ?!)kLUKs^4
zVp$9urJFQgAFuLBJ?jcaN$v+tMS|oTC;1RkWMzoaFp`^CAo&u|l3b;X<L-ieB3S?k
z7Wi8tMEeRsYIzWoT*TZNjlo}Ih{2JEa5F)Fyt)|qb<k(vQ*}zd0c~HyPdP|1{s~j@
zv!P9<maTXas9Ozivy73r%!UrFIa@Z2Easg>VZHHGuJKg1;mvHQea+cnzR4{1{A%cz
zL)rbuv!N4f&XZ>Qj~CVTgYCJ4?b*hTZ0Piwv-6o9h4wEV|JB>Se0#O>*sKelt6P?O
z)?3cxTF$Iiy*2BBmf(*0$+^k-J9Bpyg!Q_^xw^w^fg>A`s|CnkV07Pq&3ni~`lHLo
z*N^q)j`e1b_GN3%uX+1{iK`BGej``+#&YHQk?!1)?$57fFaK!GdBd6<o<cql{Noc(
zC7?cAfyMUC`4)VO{NmMYRb<WCyb%aujAtvJEBIjF?G5(sL0|82^*QOU8^e7x{qHmj
z_nI>r#hQ&qBSFm(jS5mM8kKPmeWB~&PNS76MWZ1ml`*;(?S(h+BUx+WWnc?TmI!mj
zAz3u4F_6qtWt?p!(F<A3=g!VCNtl2?){wD<fu%$W4;AExDeCw33W|B!h^XK<$o~y;
zeS_Tpg+g0)J7wS6QBUpN>Zhuy&aLCSsfw-6T~yUp$A0PtwKYg_RK;c=g&6N_`Vn1|
cXS_4rKkxrZ|J=KO_1<Hqa+5~PE+WGJ0kx6CeE<Le


From bf49a018f85b0a987eb30d36c57f28a509ca2e87 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Thu, 9 Oct 2025 13:50:52 +0000
Subject: [PATCH 4/4] Modernize gene extraction scripts and add test suite

- Update "take genes into aminoacid.py" with argparse and progress tracking
- Update "take genes into nucleotides.py" with argparse and progress tracking
- Add professional structure matching cdsselector.py
- Create comprehensive test suite in tests/ folder
- Add test data with sample GenBank files
- Add progress output showing "Processing file X/Y"
- All 11 tests passing (100%)

Co-authored-by: Mxrcon <48180517+Mxrcon@users.noreply.github.com>
---
 .gitignore                             |   3 +
 take genes into aminoacid.py           | 263 ++++++++++++++++++---
 take genes into nucleotides.py         | 264 ++++++++++++++++++---
 tests/README.md                        | 121 ++++++++++
 tests/test_data/genes.txt              |   4 +
 tests/test_data/input/test_genome1.gbk |  38 +++
 tests/test_data/input/test_genome2.gbk |  32 +++
 tests/test_scripts.py                  | 308 +++++++++++++++++++++++++
 8 files changed, 979 insertions(+), 54 deletions(-)
 create mode 100644 tests/README.md
 create mode 100644 tests/test_data/genes.txt
 create mode 100644 tests/test_data/input/test_genome1.gbk
 create mode 100644 tests/test_data/input/test_genome2.gbk
 create mode 100644 tests/test_scripts.py

diff --git a/.gitignore b/.gitignore
index fcf176a..441e291 100644
--- a/.gitignore
+++ b/.gitignore
@@ -36,3 +36,6 @@ env/
 # OS
 .DS_Store
 Thumbs.db
+
+# Test outputs
+tests/test_output/
diff --git a/take genes into aminoacid.py b/take genes into aminoacid.py
index bd206cc..71edb61 100644
--- a/take genes into aminoacid.py	
+++ b/take genes into aminoacid.py	
@@ -1,32 +1,241 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Gene to Amino Acid Extractor
 
-from Bio import SeqIO
-import sys
+This script extracts CDS (Coding Sequences) from GenBank files and outputs
+the translated amino acid sequences in FASTA format. Genes are filtered
+based on a provided list, and sequences are organized by gene name.
+
+Usage:
+    python "take genes into aminoacid.py" --input-folder <input_dir>
+                                          --genes-list <genes_file>
+                                          --output-folder <output_dir>
+
+Arguments:
+    --input-folder    : Folder containing GenBank (.gbk) files to process
+    --genes-list      : File containing one gene name per line
+    --output-folder   : Folder where amino acid FASTA files will be saved
+
+Author: Davi J. Marcon
+Email: davijosuemarcon@gmail.com
+"""
+
+import argparse
+import logging
 import os
-#Determinando os inputs e outputs
-entrada = sys.argv[1]
-saida = sys.argv[2]
-Lista = ["recA","rpoB"]
-#abrindo os inputs
-for i in os.listdir(entrada):
-	open(entrada+"/"+i)
-#conta o numero de gbs
-list = os.listdir(entrada)
-number_files = len(list)
-print("Foram encontrados" , (number_files), "arquivos")
-#cria  a pasta de outputs
-if not os.path.exists(saida):
-    os.makedirs(saida)
-#Executando código
-for i in os.listdir(entrada): 
-	for seq_record in SeqIO.parse(entrada+"/"+i , 'genbank'):
-		for feature in seq_record.features:
-			if feature.type == "CDS" and "gene" in feature.qualifiers:
-				gene = feature.qualifiers['gene'][0]
-				if gene in Lista:
-					if not os.path.exists(saida+"/"+gene):
-    						os.makedirs(saida+"/"+gene)
-					with open(saida+"/"+gene+"/"+seq_record.id+".fasta", "w") as ofile:
-						ofile.write(">{0}\n{1}\n".format(seq_record.id, feature.qualifiers['translation'][0]))
+import sys
+from Bio import SeqIO
+
+
+def setup_logging():
+    """Configure logging for the script."""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+
+
+def parse_arguments():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description='Extract amino acid sequences from GenBank files.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python "take genes into aminoacid.py" --input-folder gbk_files
+                                          --genes-list genes.txt
+                                          --output-folder amino_acids
+    python "take genes into aminoacid.py" -i gbk_files -g genes.txt -o aa_out
+        """
+    )
+
+    parser.add_argument(
+        '--input-folder', '-i',
+        required=True,
+        metavar='DIR',
+        help='Input folder containing GenBank files'
+    )
+
+    parser.add_argument(
+        '--genes-list', '-g',
+        required=True,
+        metavar='FILE',
+        help='File containing gene names (one per line)'
+    )
+
+    parser.add_argument(
+        '--output-folder', '-o',
+        required=True,
+        metavar='DIR',
+        help='Output folder for amino acid FASTA files'
+    )
+
+    return parser.parse_args()
+
+
+def load_gene_list(genes_file):
+    """
+    Load gene names from a file.
+
+    Args:
+        genes_file (str): Path to the file containing gene names
+
+    Returns:
+        list: List of gene names
+
+    Raises:
+        FileNotFoundError: If the gene list file does not exist
+        IOError: If there is an error reading the file
+    """
+    if not os.path.exists(genes_file):
+        raise FileNotFoundError(f"Gene list file not found: {genes_file}")
+
+    try:
+        with open(genes_file, 'r') as f:
+            gene_list = [line.strip() for line in f if line.strip()]
+        logging.info(f"Loaded {len(gene_list)} genes from {genes_file}")
+        return gene_list
+    except IOError as e:
+        raise IOError(f"Error reading gene list file: {e}")
+
+
+def validate_input_folder(input_folder):
+    """
+    Validate that the input folder exists and contains GenBank files.
+
+    Args:
+        input_folder (str): Path to the input folder
+
+    Returns:
+        list: List of GenBank files in the folder
+
+    Raises:
+        FileNotFoundError: If the folder does not exist
+        ValueError: If no GenBank files are found
+    """
+    if not os.path.exists(input_folder):
+        raise FileNotFoundError(f"Input folder not found: {input_folder}")
+
+    if not os.path.isdir(input_folder):
+        raise ValueError(f"Input path is not a directory: {input_folder}")
+
+    files = [f for f in os.listdir(input_folder)
+             if f.endswith(('.gbk', '.gb', '.genbank'))]
+
+    if not files:
+        raise ValueError(f"No GenBank files found in {input_folder}")
+
+    logging.info(f"Found {len(files)} GenBank file(s) in {input_folder}")
+    return files
+
+
+def process_genbank_files(input_folder, output_folder, gene_list, input_files):
+    """
+    Process all GenBank files and extract amino acid sequences.
+
+    Args:
+        input_folder (str): Input folder path
+        output_folder (str): Output folder path
+        gene_list (list): List of gene names to extract
+        input_files (list): List of GenBank files to process
+    """
+    processed_count = 0
+    error_count = 0
+    extracted_count = 0
+
+    for idx, input_file in enumerate(input_files, 1):
+        input_path = os.path.join(input_folder, input_file)
+
+        try:
+            logging.info(f"Processing file {idx}/{len(input_files)}: {input_file}")
+
+            for seq_record in SeqIO.parse(input_path, 'genbank'):
+                for feature in seq_record.features:
+                    if feature.type == "CDS" and "gene" in feature.qualifiers:
+                        gene = feature.qualifiers['gene'][0]
+                        if gene in gene_list:
+                            # Check if translation exists
+                            if 'translation' not in feature.qualifiers:
+                                logging.warning(
+                                    f"No translation for gene {gene} in "
+                                    f"{input_file}"
+                                )
+                                continue
+
+                            # Create gene-specific output folder
+                            gene_folder = os.path.join(output_folder, gene)
+                            if not os.path.exists(gene_folder):
+                                os.makedirs(gene_folder)
+                                logging.debug(f"Created folder: {gene_folder}")
+
+                            # Write amino acid sequence
+                            output_file = os.path.join(
+                                gene_folder,
+                                f"{seq_record.id}.fasta"
+                            )
+                            with open(output_file, "w") as ofile:
+                                translation = feature.qualifiers['translation'][0]
+                                ofile.write(f">{seq_record.id}\n{translation}\n")
+
+                            extracted_count += 1
+
+            processed_count += 1
+
+        except Exception as e:
+            error_count += 1
+            logging.error(f"Error processing {input_file}: {e}")
+
+    logging.info(
+        f"Processing complete: {processed_count} files succeeded, "
+        f"{error_count} files failed"
+    )
+    logging.info(f"Extracted {extracted_count} amino acid sequences")
+
+
+def main():
+    """Main function to orchestrate the amino acid extraction process."""
+    setup_logging()
+
+    try:
+        # Parse command line arguments
+        args = parse_arguments()
+
+        logging.info("Gene to Amino Acid Extractor started")
+        logging.info(f"Input folder: {args.input_folder}")
+        logging.info(f"Genes list: {args.genes_list}")
+        logging.info(f"Output folder: {args.output_folder}")
+
+        # Load gene list
+        gene_list = load_gene_list(args.genes_list)
+
+        # Validate input folder and get list of files
+        input_files = validate_input_folder(args.input_folder)
+
+        # Create output folder if it doesn't exist
+        if not os.path.exists(args.output_folder):
+            os.makedirs(args.output_folder)
+            logging.info(f"Created output folder: {args.output_folder}")
+
+        # Process all GenBank files
+        process_genbank_files(
+            args.input_folder,
+            args.output_folder,
+            gene_list,
+            input_files
+        )
+
+        logging.info("Gene to Amino Acid Extractor finished successfully")
+        return 0
+
+    except Exception as e:
+        logging.error(f"Fatal error: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
 
 
 
diff --git a/take genes into nucleotides.py b/take genes into nucleotides.py
index 458b339..7da96ae 100644
--- a/take genes into nucleotides.py	
+++ b/take genes into nucleotides.py	
@@ -1,32 +1,242 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Gene to Nucleotide Extractor
 
-from Bio import SeqIO
-import sys
+This script extracts CDS (Coding Sequences) from GenBank files and outputs
+the nucleotide sequences in FASTA format. Genes are filtered based on a
+provided list, and sequences are organized by gene name.
+
+Usage:
+    python "take genes into nucleotides.py" --input-folder <input_dir>
+                                            --genes-list <genes_file>
+                                            --output-folder <output_dir>
+
+Arguments:
+    --input-folder    : Folder containing GenBank (.gbk) files to process
+    --genes-list      : File containing one gene name per line
+    --output-folder   : Folder where nucleotide FASTA files will be saved
+
+Author: Davi J. Marcon
+Email: davijosuemarcon@gmail.com
+"""
+
+import argparse
+import logging
 import os
-#Determinando os inputs e outputs
-entrada = sys.argv[1]
-saida = sys.argv[2]
-Lista = ["recA","rpoB"]
-#abrindo os inputs
-for i in os.listdir(entrada):
-	open(entrada+"/"+i)
-#conta o numero de gbs
-list = os.listdir(entrada)
-number_files = len(list)
-print("Foram encontrados" , (number_files), "arquivos")
-#cria  a pasta de outputs
-if not os.path.exists(saida):
-    os.makedirs(saida)
-#Executando código
-for i in os.listdir(entrada): 
-	for seq_record in SeqIO.parse(entrada+"/"+i , 'genbank'):
-		for feature in seq_record.features:
-			if feature.type == "CDS" and "gene" in feature.qualifiers:
-				gene = feature.qualifiers['gene'][0]
-				if gene in Lista:
-					if not os.path.exists(saida+"/"+gene):
-    						os.makedirs(saida+"/"+gene)
-					with open(saida+"/"+gene+"/"+seq_record.id+".fasta", "w") as ofile:
-						ofile.write(">{0}\n{1}\n".format(seq_record.id, feature.location.extract(seq_record).seq))
+import sys
+from Bio import SeqIO
+
+
+def setup_logging():
+    """Configure logging for the script."""
+    logging.basicConfig(
+        level=logging.INFO,
+        format='%(asctime)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+
+
+def parse_arguments():
+    """Parse command line arguments."""
+    parser = argparse.ArgumentParser(
+        description='Extract nucleotide sequences from GenBank files.',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+    python "take genes into nucleotides.py" --input-folder gbk_files
+                                            --genes-list genes.txt
+                                            --output-folder nucleotides
+    python "take genes into nucleotides.py" -i gbk_files -g genes.txt -o nuc_out
+        """
+    )
+
+    parser.add_argument(
+        '--input-folder', '-i',
+        required=True,
+        metavar='DIR',
+        help='Input folder containing GenBank files'
+    )
+
+    parser.add_argument(
+        '--genes-list', '-g',
+        required=True,
+        metavar='FILE',
+        help='File containing gene names (one per line)'
+    )
+
+    parser.add_argument(
+        '--output-folder', '-o',
+        required=True,
+        metavar='DIR',
+        help='Output folder for nucleotide FASTA files'
+    )
+
+    return parser.parse_args()
+
+
+def load_gene_list(genes_file):
+    """
+    Load gene names from a file.
+
+    Args:
+        genes_file (str): Path to the file containing gene names
+
+    Returns:
+        list: List of gene names
+
+    Raises:
+        FileNotFoundError: If the gene list file does not exist
+        IOError: If there is an error reading the file
+    """
+    if not os.path.exists(genes_file):
+        raise FileNotFoundError(f"Gene list file not found: {genes_file}")
+
+    try:
+        with open(genes_file, 'r') as f:
+            gene_list = [line.strip() for line in f if line.strip()]
+        logging.info(f"Loaded {len(gene_list)} genes from {genes_file}")
+        return gene_list
+    except IOError as e:
+        raise IOError(f"Error reading gene list file: {e}")
+
+
+def validate_input_folder(input_folder):
+    """
+    Validate that the input folder exists and contains GenBank files.
+
+    Args:
+        input_folder (str): Path to the input folder
+
+    Returns:
+        list: List of GenBank files in the folder
+
+    Raises:
+        FileNotFoundError: If the folder does not exist
+        ValueError: If no GenBank files are found
+    """
+    if not os.path.exists(input_folder):
+        raise FileNotFoundError(f"Input folder not found: {input_folder}")
+
+    if not os.path.isdir(input_folder):
+        raise ValueError(f"Input path is not a directory: {input_folder}")
+
+    files = [f for f in os.listdir(input_folder)
+             if f.endswith(('.gbk', '.gb', '.genbank'))]
+
+    if not files:
+        raise ValueError(f"No GenBank files found in {input_folder}")
+
+    logging.info(f"Found {len(files)} GenBank file(s) in {input_folder}")
+    return files
+
+
+def process_genbank_files(input_folder, output_folder, gene_list, input_files):
+    """
+    Process all GenBank files and extract nucleotide sequences.
+
+    Args:
+        input_folder (str): Input folder path
+        output_folder (str): Output folder path
+        gene_list (list): List of gene names to extract
+        input_files (list): List of GenBank files to process
+    """
+    processed_count = 0
+    error_count = 0
+    extracted_count = 0
+
+    for idx, input_file in enumerate(input_files, 1):
+        input_path = os.path.join(input_folder, input_file)
+
+        try:
+            logging.info(f"Processing file {idx}/{len(input_files)}: {input_file}")
+
+            for seq_record in SeqIO.parse(input_path, 'genbank'):
+                for feature in seq_record.features:
+                    if feature.type == "CDS" and "gene" in feature.qualifiers:
+                        gene = feature.qualifiers['gene'][0]
+                        if gene in gene_list:
+                            # Extract nucleotide sequence
+                            try:
+                                nuc_seq = feature.location.extract(seq_record).seq
+                            except Exception as e:
+                                logging.warning(
+                                    f"Could not extract sequence for gene {gene} "
+                                    f"in {input_file}: {e}"
+                                )
+                                continue
+
+                            # Create gene-specific output folder
+                            gene_folder = os.path.join(output_folder, gene)
+                            if not os.path.exists(gene_folder):
+                                os.makedirs(gene_folder)
+                                logging.debug(f"Created folder: {gene_folder}")
+
+                            # Write nucleotide sequence
+                            output_file = os.path.join(
+                                gene_folder,
+                                f"{seq_record.id}.fasta"
+                            )
+                            with open(output_file, "w") as ofile:
+                                ofile.write(f">{seq_record.id}\n{nuc_seq}\n")
+
+                            extracted_count += 1
+
+            processed_count += 1
+
+        except Exception as e:
+            error_count += 1
+            logging.error(f"Error processing {input_file}: {e}")
+
+    logging.info(
+        f"Processing complete: {processed_count} files succeeded, "
+        f"{error_count} files failed"
+    )
+    logging.info(f"Extracted {extracted_count} nucleotide sequences")
+
+
+def main():
+    """Main function to orchestrate the nucleotide extraction process."""
+    setup_logging()
+
+    try:
+        # Parse command line arguments
+        args = parse_arguments()
+
+        logging.info("Gene to Nucleotide Extractor started")
+        logging.info(f"Input folder: {args.input_folder}")
+        logging.info(f"Genes list: {args.genes_list}")
+        logging.info(f"Output folder: {args.output_folder}")
+
+        # Load gene list
+        gene_list = load_gene_list(args.genes_list)
+
+        # Validate input folder and get list of files
+        input_files = validate_input_folder(args.input_folder)
+
+        # Create output folder if it doesn't exist
+        if not os.path.exists(args.output_folder):
+            os.makedirs(args.output_folder)
+            logging.info(f"Created output folder: {args.output_folder}")
+
+        # Process all GenBank files
+        process_genbank_files(
+            args.input_folder,
+            args.output_folder,
+            gene_list,
+            input_files
+        )
+
+        logging.info("Gene to Nucleotide Extractor finished successfully")
+        return 0
+
+    except Exception as e:
+        logging.error(f"Fatal error: {e}")
+        return 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())
 
 
 
diff --git a/tests/README.md b/tests/README.md
new file mode 100644
index 0000000..19a85b2
--- /dev/null
+++ b/tests/README.md
@@ -0,0 +1,121 @@
+# Test Suite for Bioinfo Python Scripts
+
+This folder contains test data and test scripts to verify the functionality of the Python scripts in this repository.
+
+## Structure
+
+```
+tests/
+├── test_data/
+│   ├── input/              # Test GenBank files
+│   │   ├── test_genome1.gbk
+│   │   └── test_genome2.gbk
+│   └── genes.txt           # Test gene list
+├── test_output/            # Output from test runs (generated)
+├── test_scripts.py         # Main test suite
+└── README.md              # This file
+```
+
+## Test Data
+
+The test data includes:
+- **test_genome1.gbk**: Contains genes dnaA, rpoB, and a test gene
+- **test_genome2.gbk**: Contains genes recA and dnaK
+- **genes.txt**: List of genes to filter/extract (dnaA, dnaK, rpoB, recA)
+
+## Running Tests
+
+### Run All Tests
+
+To run the complete test suite:
+
+```bash
+python tests/test_scripts.py
+```
+
+This will test:
+1. `cdsselector.py` - Filters GenBank files by gene list
+2. `take genes into aminoacid.py` - Extracts amino acid sequences
+3. `take genes into nucleotides.py` - Extracts nucleotide sequences
+
+### Manual Testing
+
+You can also test individual scripts manually:
+
+**Test cdsselector.py:**
+```bash
+python cdsselector.py --input-folder tests/test_data/input \
+                      --genes-list tests/test_data/genes.txt \
+                      --output-folder tests/test_output/cdsselector
+```
+
+**Test take genes into aminoacid.py:**
+```bash
+python "take genes into aminoacid.py" --input-folder tests/test_data/input \
+                                      --genes-list tests/test_data/genes.txt \
+                                      --output-folder tests/test_output/aminoacid
+```
+
+**Test take genes into nucleotides.py:**
+```bash
+python "take genes into nucleotides.py" --input-folder tests/test_data/input \
+                                        --genes-list tests/test_data/genes.txt \
+                                        --output-folder tests/test_output/nucleotide
+```
+
+## Expected Results
+
+### cdsselector.py
+- Creates filtered GenBank files with only the genes in the list
+- Output files: `test_genome1.gbk`, `test_genome2.gbk`
+- Each file contains only the genes specified in genes.txt
+
+### take genes into aminoacid.py
+- Creates folders for each gene
+- Each folder contains FASTA files with amino acid sequences
+- Expected output structure:
+  ```
+  aminoacid/
+  ├── dnaA/
+  │   └── TEST001.fasta
+  ├── rpoB/
+  │   └── TEST001.fasta
+  ├── recA/
+  │   └── TEST002.fasta
+  └── dnaK/
+      └── TEST002.fasta
+  ```
+
+### take genes into nucleotides.py
+- Creates folders for each gene
+- Each folder contains FASTA files with nucleotide sequences
+- Expected output structure:
+  ```
+  nucleotide/
+  ├── dnaA/
+  │   └── TEST001.fasta
+  ├── rpoB/
+  │   └── TEST001.fasta
+  ├── recA/
+  │   └── TEST002.fasta
+  └── dnaK/
+      └── TEST002.fasta
+  ```
+
+## Adding New Tests
+
+To add new test cases:
+
+1. Add new GenBank files to `test_data/input/`
+2. Update `genes.txt` if testing new genes
+3. Modify `test_scripts.py` to include new test cases
+
+## Requirements
+
+- Python 3.6+
+- Biopython
+
+Install requirements:
+```bash
+pip install biopython
+```
diff --git a/tests/test_data/genes.txt b/tests/test_data/genes.txt
new file mode 100644
index 0000000..8b5ba30
--- /dev/null
+++ b/tests/test_data/genes.txt
@@ -0,0 +1,4 @@
+dnaA
+dnaK
+rpoB
+recA
diff --git a/tests/test_data/input/test_genome1.gbk b/tests/test_data/input/test_genome1.gbk
new file mode 100644
index 0000000..884b881
--- /dev/null
+++ b/tests/test_data/input/test_genome1.gbk
@@ -0,0 +1,38 @@
+LOCUS       TEST_GENOME1            300 bp    DNA     linear   BCT 01-JAN-2024
+DEFINITION  Test genome 1 for script testing.
+ACCESSION   TEST001
+VERSION     TEST001.1
+KEYWORDS    .
+SOURCE      Test organism 1
+  ORGANISM  Test organism 1
+            Bacteria.
+FEATURES             Location/Qualifiers
+     source          1..300
+                     /organism="Test organism 1"
+                     /mol_type="genomic DNA"
+     gene            10..72
+                     /gene="dnaA"
+     CDS             10..72
+                     /gene="dnaA"
+                     /product="chromosomal replication initiator protein DnaA"
+                     /translation="MKLVRVLSTAAAA"
+     gene            100..162
+                     /gene="rpoB"
+     CDS             100..162
+                     /gene="rpoB"
+                     /product="DNA-directed RNA polymerase subunit beta"
+                     /translation="MKLIVKASTGPAT"
+     gene            200..262
+                     /gene="test"
+     CDS             200..262
+                     /gene="test"
+                     /product="test protein"
+                     /translation="MTEST*PROTEIN"
+ORIGIN
+        1 atgaaacttg tacgcgtact ttcaacagca gctgctgctt agctagctag ctagctagct
+       61 agctagctag ctagctagct agctagctag ctagctagat gaagttgatt gtaaaagcat
+      121 caactggtcc tgcgacttag ctagctagct agctagctag ctagctagct agctagctag
+      181 ctagctagct agctagctaa tgactgaaag tacaacccat ccgcgaacag ctagctagct
+      241 agctagctag ctagctagct agctagctag ctagctagct agctagctag ctagctagct
+      301
+//
diff --git a/tests/test_data/input/test_genome2.gbk b/tests/test_data/input/test_genome2.gbk
new file mode 100644
index 0000000..d5c05ce
--- /dev/null
+++ b/tests/test_data/input/test_genome2.gbk
@@ -0,0 +1,32 @@
+LOCUS       TEST_GENOME2            300 bp    DNA     linear   BCT 01-JAN-2024
+DEFINITION  Test genome 2 for script testing.
+ACCESSION   TEST002
+VERSION     TEST002.1
+KEYWORDS    .
+SOURCE      Test organism 2
+  ORGANISM  Test organism 2
+            Bacteria.
+FEATURES             Location/Qualifiers
+     source          1..300
+                     /organism="Test organism 2"
+                     /mol_type="genomic DNA"
+     gene            10..72
+                     /gene="recA"
+     CDS             10..72
+                     /gene="recA"
+                     /product="recombinase A"
+                     /translation="MKRECOMBINAS"
+     gene            100..162
+                     /gene="dnaK"
+     CDS             100..162
+                     /gene="dnaK"
+                     /product="molecular chaperone DnaK"
+                     /translation="MKCHAPERONEA"
+ORIGIN
+        1 atgaaacgcg aacgaattca aatgtcgtga ttaacgctta gctagctagc tagctagcta
+       61 gctagctagc tagctagcta gctagctagc tagctagcta gatgaaatgc catgccgagc
+      121 gaaatgctga agctagctag ctagctagct agctagctag ctagctagct agctagctag
+      181 ctagctagct agctagctat gctagctagt cgttcgattc gattcgatag ctagctagct
+      241 agctagctag ctagctagct agctagctag ctagctagct agctagctag ctagctagct
+      301
+//
diff --git a/tests/test_scripts.py b/tests/test_scripts.py
new file mode 100644
index 0000000..7b229d5
--- /dev/null
+++ b/tests/test_scripts.py
@@ -0,0 +1,308 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+Test Suite for Bioinfo Python Scripts
+
+This script tests the functionality of cdsselector.py,
+take genes into aminoacid.py, and take genes into nucleotides.py.
+
+Usage:
+    python tests/test_scripts.py
+"""
+
+import os
+import sys
+import subprocess
+import shutil
+from pathlib import Path
+
+
+class Colors:
+    """ANSI color codes for terminal output."""
+    GREEN = '\033[92m'
+    RED = '\033[91m'
+    YELLOW = '\033[93m'
+    BLUE = '\033[94m'
+    BOLD = '\033[1m'
+    END = '\033[0m'
+
+
+def print_header(text):
+    """Print a formatted header."""
+    print(f"\n{Colors.BOLD}{Colors.BLUE}{'='*70}{Colors.END}")
+    print(f"{Colors.BOLD}{Colors.BLUE}{text:^70}{Colors.END}")
+    print(f"{Colors.BOLD}{Colors.BLUE}{'='*70}{Colors.END}\n")
+
+
+def print_success(text):
+    """Print success message."""
+    print(f"{Colors.GREEN}✓ {text}{Colors.END}")
+
+
+def print_error(text):
+    """Print error message."""
+    print(f"{Colors.RED}✗ {text}{Colors.END}")
+
+
+def print_info(text):
+    """Print info message."""
+    print(f"{Colors.YELLOW}ℹ {text}{Colors.END}")
+
+
+def run_command(cmd, description):
+    """
+    Run a command and return success status.
+    
+    Args:
+        cmd (list): Command and arguments to run
+        description (str): Description of the test
+        
+    Returns:
+        bool: True if command succeeded, False otherwise
+    """
+    print_info(f"Testing: {description}")
+    try:
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=30
+        )
+        if result.returncode == 0:
+            print_success(f"PASSED: {description}")
+            return True
+        else:
+            print_error(f"FAILED: {description}")
+            if result.stderr:
+                print(f"  Error: {result.stderr[:200]}")
+            return False
+    except subprocess.TimeoutExpired:
+        print_error(f"TIMEOUT: {description}")
+        return False
+    except Exception as e:
+        print_error(f"ERROR: {description} - {e}")
+        return False
+
+
+def verify_file_exists(filepath, description):
+    """Verify that a file exists."""
+    if os.path.exists(filepath):
+        print_success(f"File exists: {description}")
+        return True
+    else:
+        print_error(f"File missing: {description}")
+        return False
+
+
+def verify_folder_contents(folder, expected_count, description):
+    """Verify folder contains expected number of items."""
+    if os.path.exists(folder):
+        count = len([f for f in os.listdir(folder) 
+                    if os.path.isfile(os.path.join(folder, f))])
+        if count >= expected_count:
+            print_success(f"{description}: Found {count} files (expected >= {expected_count})")
+            return True
+        else:
+            print_error(f"{description}: Found {count} files (expected >= {expected_count})")
+            return False
+    else:
+        print_error(f"{description}: Folder does not exist")
+        return False
+
+
+def test_cdsselector():
+    """Test cdsselector.py script."""
+    print_header("Testing cdsselector.py")
+    
+    script_dir = Path(__file__).parent.parent
+    test_data_dir = script_dir / "tests" / "test_data"
+    output_dir = script_dir / "tests" / "test_output" / "cdsselector"
+    
+    # Clean output directory
+    if output_dir.exists():
+        shutil.rmtree(output_dir)
+    
+    tests_passed = 0
+    tests_total = 0
+    
+    # Test 1: Help command
+    tests_total += 1
+    if run_command(
+        ["python", str(script_dir / "cdsselector.py"), "--help"],
+        "cdsselector.py --help"
+    ):
+        tests_passed += 1
+    
+    # Test 2: Run with test data
+    tests_total += 1
+    if run_command(
+        [
+            "python", str(script_dir / "cdsselector.py"),
+            "--input-folder", str(test_data_dir / "input"),
+            "--genes-list", str(test_data_dir / "genes.txt"),
+            "--output-folder", str(output_dir)
+        ],
+        "cdsselector.py with test data"
+    ):
+        tests_passed += 1
+        
+        # Test 3: Verify output files
+        tests_total += 1
+        if verify_file_exists(
+            output_dir / "test_genome1.gbk",
+            "test_genome1.gbk output"
+        ):
+            tests_passed += 1
+    
+    print(f"\n{Colors.BOLD}cdsselector.py: {tests_passed}/{tests_total} tests passed{Colors.END}")
+    return tests_passed, tests_total
+
+
+def test_aminoacid_extractor():
+    """Test take genes into aminoacid.py script."""
+    print_header("Testing take genes into aminoacid.py")
+    
+    script_dir = Path(__file__).parent.parent
+    test_data_dir = script_dir / "tests" / "test_data"
+    output_dir = script_dir / "tests" / "test_output" / "aminoacid"
+    
+    # Clean output directory
+    if output_dir.exists():
+        shutil.rmtree(output_dir)
+    
+    tests_passed = 0
+    tests_total = 0
+    
+    # Test 1: Help command
+    tests_total += 1
+    if run_command(
+        ["python", str(script_dir / "take genes into aminoacid.py"), "--help"],
+        "take genes into aminoacid.py --help"
+    ):
+        tests_passed += 1
+    
+    # Test 2: Run with test data
+    tests_total += 1
+    if run_command(
+        [
+            "python", str(script_dir / "take genes into aminoacid.py"),
+            "--input-folder", str(test_data_dir / "input"),
+            "--genes-list", str(test_data_dir / "genes.txt"),
+            "--output-folder", str(output_dir)
+        ],
+        "take genes into aminoacid.py with test data"
+    ):
+        tests_passed += 1
+        
+        # Test 3: Verify gene folders exist
+        tests_total += 1
+        dnaA_folder = output_dir / "dnaA"
+        if verify_file_exists(dnaA_folder, "dnaA gene folder"):
+            tests_passed += 1
+            
+            # Test 4: Verify FASTA file exists
+            tests_total += 1
+            fasta_files = list(dnaA_folder.glob("*.fasta"))
+            if len(fasta_files) > 0:
+                print_success(f"Found FASTA file(s) in dnaA folder: {[f.name for f in fasta_files]}")
+                tests_passed += 1
+            else:
+                print_error("No FASTA files found in dnaA folder")
+    
+    print(f"\n{Colors.BOLD}take genes into aminoacid.py: {tests_passed}/{tests_total} tests passed{Colors.END}")
+    return tests_passed, tests_total
+
+
+def test_nucleotide_extractor():
+    """Test take genes into nucleotides.py script."""
+    print_header("Testing take genes into nucleotides.py")
+    
+    script_dir = Path(__file__).parent.parent
+    test_data_dir = script_dir / "tests" / "test_data"
+    output_dir = script_dir / "tests" / "test_output" / "nucleotide"
+    
+    # Clean output directory
+    if output_dir.exists():
+        shutil.rmtree(output_dir)
+    
+    tests_passed = 0
+    tests_total = 0
+    
+    # Test 1: Help command
+    tests_total += 1
+    if run_command(
+        ["python", str(script_dir / "take genes into nucleotides.py"), "--help"],
+        "take genes into nucleotides.py --help"
+    ):
+        tests_passed += 1
+    
+    # Test 2: Run with test data
+    tests_total += 1
+    if run_command(
+        [
+            "python", str(script_dir / "take genes into nucleotides.py"),
+            "--input-folder", str(test_data_dir / "input"),
+            "--genes-list", str(test_data_dir / "genes.txt"),
+            "--output-folder", str(output_dir)
+        ],
+        "take genes into nucleotides.py with test data"
+    ):
+        tests_passed += 1
+        
+        # Test 3: Verify gene folders exist
+        tests_total += 1
+        recA_folder = output_dir / "recA"
+        if verify_file_exists(recA_folder, "recA gene folder"):
+            tests_passed += 1
+            
+            # Test 4: Verify FASTA file exists
+            tests_total += 1
+            fasta_files = list(recA_folder.glob("*.fasta"))
+            if len(fasta_files) > 0:
+                print_success(f"Found FASTA file(s) in recA folder: {[f.name for f in fasta_files]}")
+                tests_passed += 1
+            else:
+                print_error("No FASTA files found in recA folder")
+    
+    print(f"\n{Colors.BOLD}take genes into nucleotides.py: {tests_passed}/{tests_total} tests passed{Colors.END}")
+    return tests_passed, tests_total
+
+
+def main():
+    """Run all tests."""
+    print_header("Bioinfo Python Scripts Test Suite")
+    
+    total_passed = 0
+    total_tests = 0
+    
+    # Run all tests
+    passed, total = test_cdsselector()
+    total_passed += passed
+    total_tests += total
+    
+    passed, total = test_aminoacid_extractor()
+    total_passed += passed
+    total_tests += total
+    
+    passed, total = test_nucleotide_extractor()
+    total_passed += passed
+    total_tests += total
+    
+    # Print final summary
+    print_header("Test Summary")
+    
+    success_rate = (total_passed / total_tests * 100) if total_tests > 0 else 0
+    
+    if total_passed == total_tests:
+        print(f"{Colors.GREEN}{Colors.BOLD}ALL TESTS PASSED!{Colors.END}")
+    else:
+        print(f"{Colors.YELLOW}{Colors.BOLD}SOME TESTS FAILED{Colors.END}")
+    
+    print(f"\n{Colors.BOLD}Total: {total_passed}/{total_tests} tests passed ({success_rate:.1f}%){Colors.END}\n")
+    
+    # Return exit code
+    return 0 if total_passed == total_tests else 1
+
+
+if __name__ == "__main__":
+    sys.exit(main())