From f4861e711f3efd934cae7cd4647dead3ead8cf09 Mon Sep 17 00:00:00 2001
From: Vladimir Grigoriants <vova.grig2002@gmail.com>
Date: Wed, 27 Sep 2023 13:10:50 +0400
Subject: [PATCH 01/36] Add folder HW4_Grigoriants, create README.md

---
 HW4_Grigoriants/README.md | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 HW4_Grigoriants/README.md

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/HW4_Grigoriants/README.md
@@ -0,0 +1 @@
+

From c3b919cccef914ea93586a2ff7a5ca9041d9859d Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Fri, 29 Sep 2023 13:57:45 +0300
Subject: [PATCH 02/36] Add protein_tools.py with run_protein_tools and
 check_for_motif functions inside

---
 HW4_Grigoriants/protein_tools.py | 41 ++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)
 create mode 100644 HW4_Grigoriants/protein_tools.py

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
new file mode 100644
index 0000000..bf06915
--- /dev/null
+++ b/HW4_Grigoriants/protein_tools.py
@@ -0,0 +1,41 @@
+def check_for_motifs(sequences, motif):
+    start = 0
+    nl = "\n"  # used for user-friendly output
+    all_positions = []
+    for sequence in sequences:
+        if motif in sequence:
+            positions = []
+            while True:
+                start = sequence.find(motif, start)
+                if start == -1:
+                    break
+                positions.append(start)
+                start += 1  # use += len(motif) not to count overlapping matches
+            all_positions.append(positions)
+            pos_for_print = ", ".join(str(x) for x in positions)
+            print(f"Sequence: {sequence}")
+            print(f"Motif: {motif}")
+            print(
+                f"Motif is present in protein sequence starting at positions: {pos_for_print}{nl}"
+            )
+        else:
+            all_positions.append([])
+            print(f"Sequence: {sequence}")
+            print(f"Motif: {motif}")
+            print(f"Motif is not present in protein sequence{nl}")
+    return all_positions
+
+
+procedures_to_functions = {"check_for_motifs": check_for_motifs}
+
+
+def run_protein_tools(*args, **kwargs):
+    sequences = list(args)
+    procedure = kwargs["procedure"]
+    if procedure not in procedures_to_functions.keys():
+        raise ValueError("Wrong procedure")
+    procedure_arguments = {}
+    procedure_arguments["sequences"] = sequences
+    if procedure == "check_for_motifs":
+        procedure_arguments["motif"] = kwargs["motif"]
+    return procedures_to_functions[procedure](**procedure_arguments)

From bc24a411248e1c73ed401ceab4983cea6a0e334b Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Fri, 29 Sep 2023 13:34:14 +0200
Subject: [PATCH 03/36] Add 'search_for_alt_frames' function

---
 HW4_Grigoriants/protein_tools.py | 36 ++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index bf06915..f78b59c 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -26,6 +26,42 @@ def check_for_motifs(sequences, motif):
     return all_positions
 
 
+def search_for_alt_frames(sequences: str, alt_st_codon: str, num_position=0):
+    """
+    Search for alternative frames in a protein sequences
+
+    Without an alt_st_codon argument search for frames that start with methionine ('M')
+    To search frames with alternative start codon add alt_st_codon argument
+    In alt_st_codon argument use one-letter code
+
+    The function ignores the last three amino acids in sequences
+
+    Arguments:
+    - sequences (tuple(str) or list(str)): sequences to check
+    - alt_st_codon (str): the name of an amino acid that is encoded by alternative start codon (Optional)
+    Example: alt_st_codon = 'I'
+
+    Return:
+    - dictionary: the number of a sequence and a collection of alternative frames
+    """
+    if len(alt_st_codon) > 1:
+        raise ValueError('Invalid start codon!')
+    alternative_frames = {}
+    for sequence in sequences:
+        for amino_acid in sequence[1:-3]:
+            num_position += 1
+            if (amino_acid == alt_st_codon or
+                    amino_acid == alt_st_codon.swapcase()):
+                key = sequences.index(sequence) + 1
+                if key in alternative_frames:
+                    alternative_frames[key] += sequence[num_position:] + '  '
+                else:
+                    alternative_frames[key] = sequence[num_position:] + '  '
+        num_position = 0
+    for key, value in alternative_frames.items():
+        print(key, value)
+
+
 procedures_to_functions = {"check_for_motifs": check_for_motifs}
 
 

From f81d442a56d5a1c631e2f5337a881c49ef52738b Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Fri, 29 Sep 2023 13:36:44 +0200
Subject: [PATCH 04/36] Add 'convert_to_nucl_acids' function

---
 HW4_Grigoriants/protein_tools.py | 64 ++++++++++++++++++++++++++++++++
 1 file changed, 64 insertions(+)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index f78b59c..762ea54 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -62,6 +62,70 @@ def search_for_alt_frames(sequences: str, alt_st_codon: str, num_position=0):
         print(key, value)
 
 
+def convert_to_nucl_acids(sequences: str, nucl_acids: str):
+    """
+    Convert protein sequences to RNA or DNA sequences.
+
+    Use the most frequent codons in human. The source - https://www.genscript.com/tools/codon-frequency-table
+    All nucleic acids (DNA and RNA) are showed in 5'-3' direction
+
+    Arguments:
+    - sequences (tuple(str) or list(str)): sequences to convert
+    - nucl_acids (str): the nucleic acid that is prefered
+    Example: nucl_acids = 'RNA' - convert to RNA
+             nucl_acids = 'DNA' - convert to DNA
+             nucl_acids = 'both' - convert to RNA and DNA
+    Return:
+    - dictionary: a collection of alternative frames
+    If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of frames
+    If nucl_acids = 'both' output the name of a nucleic acid and a collection of frames
+    """
+    alphabet = {'F': 'UUU', 'f': 'uuu',
+                'L': 'CUG', 'l': 'cug',
+                'I': 'AUU', 'i': 'auu',
+                'M': 'AUG', 'm': 'aug',
+                'V': 'GUG', 'v': 'gug',
+                'P': 'CCG', 'p': 'ccg',
+                'T': 'ACC', 't': 'acc',
+                'A': 'GCG', 'a': 'gcg',
+                'Y': 'UAU', 'y': 'uau',
+                'H': 'CAU', 'h': 'cau',
+                'Q': 'CAG', 'q': 'cag',
+                'N': 'AAC', 'n': 'aac',
+                'K': 'AAA', 'k': 'aaa',
+                'D': 'GAU', 'd': 'gau',
+                'E': 'GAA', 'e': 'gaa',
+                'C': 'UGC', 'c': 'ugc',
+                'W': 'UGG', 'w': 'ugg',
+                'R': 'CGU', 'r': 'cgu',
+                'S': 'AGC', 's': 'agc',
+                'G': 'GGC', 'g': 'ggc',
+                }
+    if nucl_acids not in {'DNA', 'RNA', 'both'}:
+        raise ValueError('Invalid nucl_acids argument!')
+    rule_of_translation = sequences[0].maketrans(alphabet)
+    rule_of_transcription = sequences[0].maketrans('AaUuCcGg', 'TtAaGgCc')
+    nucl_acid_seqs = {}
+    for sequence in sequences:
+        rna_seq = sequence.translate(rule_of_translation)
+        reverse_dna_seq = rna_seq.translate(rule_of_transcription)[::-1]
+        if 'RNA' in nucl_acid_seqs.keys():
+            nucl_acid_seqs['RNA'] += rna_seq + '  '
+        else:
+            nucl_acid_seqs['RNA'] = rna_seq + '  '
+        if 'DNA' in nucl_acid_seqs.keys():
+            nucl_acid_seqs['DNA'] += reverse_dna_seq + '  '
+        else:
+            nucl_acid_seqs['DNA'] = reverse_dna_seq + '  '
+    if nucl_acids == 'RNA':
+        return nucl_acid_seqs['RNA']
+    elif nucl_acids == 'DNA':
+        return nucl_acid_seqs['DNA']
+    elif nucl_acids == 'both':
+        for key, value in nucl_acid_seqs.items():
+            print(key, value)
+
+
 procedures_to_functions = {"check_for_motifs": check_for_motifs}
 
 

From cbeb58a4feea1015c3ca7558bcaf24a32000781f Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Fri, 29 Sep 2023 13:43:56 +0200
Subject: [PATCH 05/36] Add conditions in 'main' function

---
 HW4_Grigoriants/protein_tools.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 762ea54..3eb3cb9 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -126,7 +126,10 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
             print(key, value)
 
 
-procedures_to_functions = {"check_for_motifs": check_for_motifs}
+procedures_to_functions = {"check_for_motifs": check_for_motifs,
+                           'search_for_alt_frames': search_for_alt_frames,
+                           'convert_to_nucl_acids': convert_to_nucl_acids
+                           }
 
 
 def run_protein_tools(*args, **kwargs):
@@ -138,4 +141,18 @@ def run_protein_tools(*args, **kwargs):
     procedure_arguments["sequences"] = sequences
     if procedure == "check_for_motifs":
         procedure_arguments["motif"] = kwargs["motif"]
-    return procedures_to_functions[procedure](**procedure_arguments)
+        return procedures_to_functions[procedure](**procedure_arguments)
+    elif procedure == 'search_for_alt_frames':
+        if 'alt_st_codon' not in kwargs.keys():
+            procedure_arguments['alt_st_codon'] = 'M'
+        else:
+            procedure_arguments['alt_st_codon'] = kwargs['alt_st_codon']
+        procedure_arguments['sequences'] = sequences
+        return procedures_to_functions[procedure](**procedure_arguments)
+    elif procedure == 'convert_to_nucl_acids':
+        if 'nucl_acids' not in kwargs.keys():
+            raise ValueError('Add type of nucl_acids!')
+        else:
+            procedure_arguments['nucl_acids'] = kwargs['nucl_acids']
+        procedure_arguments['sequences'] = sequences
+        return procedures_to_functions[procedure](**procedure_arguments)

From d91cfd4850ef1b005dd70d3354c1c88c94d744d7 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Fri, 29 Sep 2023 15:21:20 +0300
Subject: [PATCH 06/36] Add minor fix to protein_tools.py

---
 HW4_Grigoriants/protein_tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index bf06915..1aa99ce 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,6 +1,6 @@
 def check_for_motifs(sequences, motif):
     start = 0
-    nl = "\n"  # used for user-friendly output
+    new_line = "\n"  # used for user-friendly output
     all_positions = []
     for sequence in sequences:
         if motif in sequence:
@@ -16,13 +16,13 @@ def check_for_motifs(sequences, motif):
             print(f"Sequence: {sequence}")
             print(f"Motif: {motif}")
             print(
-                f"Motif is present in protein sequence starting at positions: {pos_for_print}{nl}"
+                f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
             )
         else:
             all_positions.append([])
             print(f"Sequence: {sequence}")
             print(f"Motif: {motif}")
-            print(f"Motif is not present in protein sequence{nl}")
+            print(f"Motif is not present in protein sequence{new_line}")
     return all_positions
 
 

From 39b8acd4cd4d237f4e224fc3ba21d9f9f18a9661 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Fri, 29 Sep 2023 19:02:08 +0300
Subject: [PATCH 07/36] Add check_and_parse_user_input in protein_tools.py, add
 fixes

---
 HW4_Grigoriants/protein_tools.py | 217 +++++++++++++++++++------------
 1 file changed, 131 insertions(+), 86 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index b4da468..8f610b9 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,32 +1,30 @@
 def check_for_motifs(sequences, motif):
-    start = 0
-    new_line = "\n"  # used for user-friendly output
-    all_positions = []
+    # new_line = "\n"  # used for user-friendly output
+    all_positions = {}
     for sequence in sequences:
+        start = 0
+        positions = []
         if motif in sequence:
-            positions = []
             while True:
                 start = sequence.find(motif, start)
                 if start == -1:
                     break
                 positions.append(start)
                 start += 1  # use += len(motif) not to count overlapping matches
-            all_positions.append(positions)
-            pos_for_print = ", ".join(str(x) for x in positions)
-            print(f"Sequence: {sequence}")
-            print(f"Motif: {motif}")
-            print(
-                f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
-            )
-        else:
-            all_positions.append([])
-            print(f"Sequence: {sequence}")
-            print(f"Motif: {motif}")
-            print(f"Motif is not present in protein sequence{new_line}")
+            # pos_for_print = ", ".join(str(x) for x in positions)
+            # print(f"Sequence: {sequence}")
+            # print(f"Motif: {motif}")
+            # print(
+            #     f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
+            # )
+        all_positions[sequence] = positions
+        # print(f"Sequence: {sequence}")
+        # print(f"Motif: {motif}")
+        # print(f"Motif is not present in protein sequence{new_line}")
     return all_positions
 
 
-def search_for_alt_frames(sequences: str, alt_st_codon: str, num_position=0):
+def search_for_alt_frames(sequences: str, alt_st_codon: str):
     """
     Search for alternative frames in a protein sequences
 
@@ -44,22 +42,23 @@ def search_for_alt_frames(sequences: str, alt_st_codon: str, num_position=0):
     Return:
     - dictionary: the number of a sequence and a collection of alternative frames
     """
-    if len(alt_st_codon) > 1:
-        raise ValueError('Invalid start codon!')
+    # if len(alt_st_codon) > 1:
+    #     raise ValueError("Invalid start codon!")
     alternative_frames = {}
+    num_position = 0
     for sequence in sequences:
         for amino_acid in sequence[1:-3]:
             num_position += 1
-            if (amino_acid == alt_st_codon or
-                    amino_acid == alt_st_codon.swapcase()):
+            if amino_acid == alt_st_codon or amino_acid == alt_st_codon.swapcase():
                 key = sequences.index(sequence) + 1
                 if key in alternative_frames:
-                    alternative_frames[key] += sequence[num_position:] + '  '
+                    alternative_frames[key] += sequence[num_position:] + "  "
                 else:
-                    alternative_frames[key] = sequence[num_position:] + '  '
+                    alternative_frames[key] = sequence[num_position:] + "  "
         num_position = 0
-    for key, value in alternative_frames.items():
-        print(key, value)
+    # for key, value in alternative_frames.items():
+    #     print(key, value)
+    return alternative_frames
 
 
 def convert_to_nucl_acids(sequences: str, nucl_acids: str):
@@ -80,79 +79,125 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of frames
     If nucl_acids = 'both' output the name of a nucleic acid and a collection of frames
     """
-    alphabet = {'F': 'UUU', 'f': 'uuu',
-                'L': 'CUG', 'l': 'cug',
-                'I': 'AUU', 'i': 'auu',
-                'M': 'AUG', 'm': 'aug',
-                'V': 'GUG', 'v': 'gug',
-                'P': 'CCG', 'p': 'ccg',
-                'T': 'ACC', 't': 'acc',
-                'A': 'GCG', 'a': 'gcg',
-                'Y': 'UAU', 'y': 'uau',
-                'H': 'CAU', 'h': 'cau',
-                'Q': 'CAG', 'q': 'cag',
-                'N': 'AAC', 'n': 'aac',
-                'K': 'AAA', 'k': 'aaa',
-                'D': 'GAU', 'd': 'gau',
-                'E': 'GAA', 'e': 'gaa',
-                'C': 'UGC', 'c': 'ugc',
-                'W': 'UGG', 'w': 'ugg',
-                'R': 'CGU', 'r': 'cgu',
-                'S': 'AGC', 's': 'agc',
-                'G': 'GGC', 'g': 'ggc',
-                }
-    if nucl_acids not in {'DNA', 'RNA', 'both'}:
-        raise ValueError('Invalid nucl_acids argument!')
+    # if nucl_acids not in {"DNA", "RNA", "both"}:
+    #     raise ValueError("Invalid nucl_acids argument!")
     rule_of_translation = sequences[0].maketrans(alphabet)
-    rule_of_transcription = sequences[0].maketrans('AaUuCcGg', 'TtAaGgCc')
-    nucl_acid_seqs = {}
+    rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
+    nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
         rna_seq = sequence.translate(rule_of_translation)
         reverse_dna_seq = rna_seq.translate(rule_of_transcription)[::-1]
-        if 'RNA' in nucl_acid_seqs.keys():
-            nucl_acid_seqs['RNA'] += rna_seq + '  '
-        else:
-            nucl_acid_seqs['RNA'] = rna_seq + '  '
-        if 'DNA' in nucl_acid_seqs.keys():
-            nucl_acid_seqs['DNA'] += reverse_dna_seq + '  '
-        else:
-            nucl_acid_seqs['DNA'] = reverse_dna_seq + '  '
-    if nucl_acids == 'RNA':
-        return nucl_acid_seqs['RNA']
-    elif nucl_acids == 'DNA':
-        return nucl_acid_seqs['DNA']
-    elif nucl_acids == 'both':
-        for key, value in nucl_acid_seqs.items():
-            print(key, value)
+        # if "RNA" in nucl_acid_seqs.keys():
+        # nucl_acid_seqs["RNA"] += rna_seq + "  "
+        # else:
+        # nucl_acid_seqs["RNA"] = rna_seq + "  "
+        if nucl_acids == "RNA":
+            nucl_acid_seqs["RNA"].append(rna_seq)
+            if sequence == sequences[-1]:
+                del nucl_acid_seqs["DNA"]
+        if nucl_acids == "DNA":
+            nucl_acid_seqs["DNA"].append(reverse_dna_seq)
+            if sequence == sequences[-1]:
+                del nucl_acid_seqs["RNA"]
+        if nucl_acids == "both":
+            nucl_acid_seqs["RNA"].append(rna_seq)
+            nucl_acid_seqs["DNA"].append(reverse_dna_seq)
+    #     if "DNA" in nucl_acid_seqs.keys():
+    #         nucl_acid_seqs["DNA"] += reverse_dna_seq + "  "
+    #     else:
+    #         nucl_acid_seqs["DNA"] = reverse_dna_seq + "  "
+    # if nucl_acids == "RNA":
+    #     return nucl_acid_seqs["RNA"]
+    # elif nucl_acids == "DNA":
+    #     return nucl_acid_seqs["DNA"]
+    # elif nucl_acids == "both":
+    # for key, value in nucl_acid_seqs.items():
+    #     print(key, value)
+    return nucl_acid_seqs
 
 
-procedures_to_functions = {"check_for_motifs": check_for_motifs,
-                           'search_for_alt_frames': search_for_alt_frames,
-                           'convert_to_nucl_acids': convert_to_nucl_acids
-                           }
+procedures_to_functions = {
+    "check_for_motifs": check_for_motifs,
+    "search_for_alt_frames": search_for_alt_frames,
+    "convert_to_nucl_acids": convert_to_nucl_acids,
+}
 
+alphabet = {
+    "F": "UUU",
+    "f": "uuu",
+    "L": "CUG",
+    "l": "cug",
+    "I": "AUU",
+    "i": "auu",
+    "M": "AUG",
+    "m": "aug",
+    "V": "GUG",
+    "v": "gug",
+    "P": "CCG",
+    "p": "ccg",
+    "T": "ACC",
+    "t": "acc",
+    "A": "GCG",
+    "a": "gcg",
+    "Y": "UAU",
+    "y": "uau",
+    "H": "CAU",
+    "h": "cau",
+    "Q": "CAG",
+    "q": "cag",
+    "N": "AAC",
+    "n": "aac",
+    "K": "AAA",
+    "k": "aaa",
+    "D": "GAU",
+    "d": "gau",
+    "E": "GAA",
+    "e": "gaa",
+    "C": "UGC",
+    "c": "ugc",
+    "W": "UGG",
+    "w": "ugg",
+    "R": "CGU",
+    "r": "cgu",
+    "S": "AGC",
+    "s": "agc",
+    "G": "GGC",
+    "g": "ggc",
+}
 
-def run_protein_tools(*args, **kwargs):
+
+def check_and_parse_user_input(*args, **kwargs):
+    if len(args) == 0:
+        raise ValueError("No sequences provided")
     sequences = list(args)
+    for sequence in sequences:
+        if not all(letters in "".join(alphabet.keys()) for letters in sequence):
+            raise ValueError("Invalid sequence given")
     procedure = kwargs["procedure"]
+    procedure_arguments = {}
     if procedure not in procedures_to_functions.keys():
         raise ValueError("Wrong procedure")
-    procedure_arguments = {}
-    procedure_arguments["sequences"] = sequences
     if procedure == "check_for_motifs":
+        if "motif" not in kwargs.keys():
+            raise ValueError("Please provide desired motif")
         procedure_arguments["motif"] = kwargs["motif"]
-        return procedures_to_functions[procedure](**procedure_arguments)
-    elif procedure == 'search_for_alt_frames':
-        if 'alt_st_codon' not in kwargs.keys():
-            procedure_arguments['alt_st_codon'] = 'M'
+    elif procedure == "search_for_alt_frames":
+        if "alt_st_codon" not in kwargs.keys():
+            procedure_arguments["alt_st_codon"] = "M"
         else:
-            procedure_arguments['alt_st_codon'] = kwargs['alt_st_codon']
-        procedure_arguments['sequences'] = sequences
-        return procedures_to_functions[procedure](**procedure_arguments)
-    elif procedure == 'convert_to_nucl_acids':
-        if 'nucl_acids' not in kwargs.keys():
-            raise ValueError('Add type of nucl_acids!')
-        else:
-            procedure_arguments['nucl_acids'] = kwargs['nucl_acids']
-        procedure_arguments['sequences'] = sequences
-        return procedures_to_functions[procedure](**procedure_arguments)
+            if len(kwargs["alt_st_codon"]) > 1:
+                raise ValueError("Invalid start codon!")
+            procedure_arguments["alt_st_codon"] = kwargs["alt_st_codon"]
+    elif procedure == "convert_to_nucl_acids":
+        if "nucl_acids" not in kwargs.keys():
+            raise ValueError("Please provide desired type of nucl_acids")
+        if kwargs["nucl_acids"] not in {"DNA", "RNA", "both"}:
+            raise ValueError("Invalid nucl_acids argument")
+        procedure_arguments["nucl_acids"] = kwargs["nucl_acids"]
+    procedure_arguments["sequences"] = sequences
+    return procedure_arguments, procedure
+
+
+def run_protein_tools(*args, **kwargs):
+    procedure_arguments, procedure = check_and_parse_user_input(*args, **kwargs)
+    return procedures_to_functions[procedure](**procedure_arguments)

From 29fd75253370379202830e0175941b70f6eaa2f4 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Fri, 29 Sep 2023 19:21:43 +0300
Subject: [PATCH 08/36] Add minor fixes in protein_tools.py

---
 HW4_Grigoriants/protein_tools.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 8f610b9..08e5512 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -24,32 +24,32 @@ def check_for_motifs(sequences, motif):
     return all_positions
 
 
-def search_for_alt_frames(sequences: str, alt_st_codon: str):
+def search_for_alt_frames(sequences: str, alt_start_aa: str):
     """
     Search for alternative frames in a protein sequences
 
-    Without an alt_st_codon argument search for frames that start with methionine ('M')
-    To search frames with alternative start codon add alt_st_codon argument
-    In alt_st_codon argument use one-letter code
+    Without an alt_start_aa argument search for frames that start with methionine ('M')
+    To search frames with alternative start codon add alt_start_aa argument
+    In alt_start_aa argument use one-letter code
 
     The function ignores the last three amino acids in sequences
 
     Arguments:
     - sequences (tuple(str) or list(str)): sequences to check
-    - alt_st_codon (str): the name of an amino acid that is encoded by alternative start codon (Optional)
-    Example: alt_st_codon = 'I'
+    - alt_start_aa (str): the name of an amino acid that is encoded by alternative start codon (Optional)
+    Example: alt_start_aa = 'I'
 
     Return:
     - dictionary: the number of a sequence and a collection of alternative frames
     """
-    # if len(alt_st_codon) > 1:
+    # if len(alt_start_aa) > 1:
     #     raise ValueError("Invalid start codon!")
     alternative_frames = {}
     num_position = 0
     for sequence in sequences:
         for amino_acid in sequence[1:-3]:
             num_position += 1
-            if amino_acid == alt_st_codon or amino_acid == alt_st_codon.swapcase():
+            if amino_acid == alt_start_aa or amino_acid == alt_start_aa.swapcase():
                 key = sequences.index(sequence) + 1
                 if key in alternative_frames:
                     alternative_frames[key] += sequence[num_position:] + "  "
@@ -182,12 +182,12 @@ def check_and_parse_user_input(*args, **kwargs):
             raise ValueError("Please provide desired motif")
         procedure_arguments["motif"] = kwargs["motif"]
     elif procedure == "search_for_alt_frames":
-        if "alt_st_codon" not in kwargs.keys():
-            procedure_arguments["alt_st_codon"] = "M"
+        if "alt_start_aa" not in kwargs.keys():
+            procedure_arguments["alt_start_aa"] = "M"
         else:
-            if len(kwargs["alt_st_codon"]) > 1:
-                raise ValueError("Invalid start codon!")
-            procedure_arguments["alt_st_codon"] = kwargs["alt_st_codon"]
+            if len(kwargs["alt_start_aa"]) > 1:
+                raise ValueError("Invalid start AA!")
+            procedure_arguments["alt_start_aa"] = kwargs["alt_start_aa"]
     elif procedure == "convert_to_nucl_acids":
         if "nucl_acids" not in kwargs.keys():
             raise ValueError("Please provide desired type of nucl_acids")

From de4e146ad1ab9b08717a165d9d92355433e78d1f Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Fri, 29 Sep 2023 19:26:31 +0300
Subject: [PATCH 09/36] Add check_and_parse_user_input in protein_tools.py, add
 fixes

---
 HW4_Grigoriants/protein_tools.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 08e5512..ea3bf08 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,5 +1,5 @@
 def check_for_motifs(sequences, motif):
-    # new_line = "\n"  # used for user-friendly output
+    new_line = "\n"  # used for user-friendly output
     all_positions = {}
     for sequence in sequences:
         start = 0
@@ -11,16 +11,16 @@ def check_for_motifs(sequences, motif):
                     break
                 positions.append(start)
                 start += 1  # use += len(motif) not to count overlapping matches
-            # pos_for_print = ", ".join(str(x) for x in positions)
-            # print(f"Sequence: {sequence}")
-            # print(f"Motif: {motif}")
-            # print(
-            #     f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
-            # )
+            pos_for_print = ", ".join(str(x) for x in positions)
+            print(f"Sequence: {sequence}")
+            print(f"Motif: {motif}")
+            print(
+                f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
+            )
         all_positions[sequence] = positions
-        # print(f"Sequence: {sequence}")
-        # print(f"Motif: {motif}")
-        # print(f"Motif is not present in protein sequence{new_line}")
+        print(f"Sequence: {sequence}")
+        print(f"Motif: {motif}")
+        print(f"Motif is not present in protein sequence{new_line}")
     return all_positions
 
 
@@ -81,7 +81,7 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     """
     # if nucl_acids not in {"DNA", "RNA", "both"}:
     #     raise ValueError("Invalid nucl_acids argument!")
-    rule_of_translation = sequences[0].maketrans(alphabet)
+    rule_of_translation = sequences[0].maketrans(translation_rule)
     rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
     nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
@@ -122,7 +122,7 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     "convert_to_nucl_acids": convert_to_nucl_acids,
 }
 
-alphabet = {
+translation_rule = {
     "F": "UUU",
     "f": "uuu",
     "L": "CUG",
@@ -171,7 +171,7 @@ def check_and_parse_user_input(*args, **kwargs):
         raise ValueError("No sequences provided")
     sequences = list(args)
     for sequence in sequences:
-        if not all(letters in "".join(alphabet.keys()) for letters in sequence):
+        if not all(letters in "".join(translation_rule.keys()) for letters in sequence):
             raise ValueError("Invalid sequence given")
     procedure = kwargs["procedure"]
     procedure_arguments = {}

From 620a551f08c16193d49bb558c7bbe452baaa3d2b Mon Sep 17 00:00:00 2001
From: Vlada Tuliavko <vlada@DESKTOP-70RJ25E>
Date: Fri, 29 Sep 2023 21:52:08 +0300
Subject: [PATCH 10/36] Add three_one_letter_code and define_molecular_weight
 functions and fixes

---
 HW4_Grigoriants/protein_tools.py | 123 ++++++++++++++++++++++++++++---
 1 file changed, 113 insertions(+), 10 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index ea3bf08..815f2e8 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,3 +1,32 @@
+def three_one_letter_code(sequences):
+    inversed_sequences = []
+    for sequence in sequences:
+        inversed_sequence = ""
+        if "-" not in sequence:
+            for letter in sequence:
+                inversed_sequence += amino_acids[letter] + "-"
+            inversed_sequence = inversed_sequence[:-1]
+            inversed_sequences.append(inversed_sequence)
+        else:
+            aa_splitted = sequence.split("-")
+            for aa in aa_splitted:
+                inversed_sequence += list(amino_acids.keys())[
+                    list(amino_acids.values()).index(aa)
+                ]
+            inversed_sequences.append(inversed_sequence)
+    return inversed_sequences
+
+
+def define_molecular_weight(sequences):
+    sequences_weights = []
+    for sequence in sequences:
+        sequence_weight = 0
+        for letter in sequence:
+            sequence_weight += amino_acid_weights[letter]
+        sequences_weights.append(sequence_weight)
+    return sequences_weights
+
+
 def check_for_motifs(sequences, motif):
     new_line = "\n"  # used for user-friendly output
     all_positions = {}
@@ -10,17 +39,19 @@ def check_for_motifs(sequences, motif):
                 if start == -1:
                     break
                 positions.append(start)
-                start += 1  # use += len(motif) not to count overlapping matches
+                # use += len(motif) not to count overlapping matches
+                start += 1
             pos_for_print = ", ".join(str(x) for x in positions)
             print(f"Sequence: {sequence}")
             print(f"Motif: {motif}")
             print(
                 f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
             )
+        else:
+            print(f"Sequence: {sequence}")
+            print(f"Motif: {motif}")
+            print(f"Motif is not present in protein sequence{new_line}")
         all_positions[sequence] = positions
-        print(f"Sequence: {sequence}")
-        print(f"Motif: {motif}")
-        print(f"Motif is not present in protein sequence{new_line}")
     return all_positions
 
 
@@ -72,8 +103,8 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     - sequences (tuple(str) or list(str)): sequences to convert
     - nucl_acids (str): the nucleic acid that is prefered
     Example: nucl_acids = 'RNA' - convert to RNA
-             nucl_acids = 'DNA' - convert to DNA
-             nucl_acids = 'both' - convert to RNA and DNA
+                     nucl_acids = 'DNA' - convert to DNA
+                     nucl_acids = 'both' - convert to RNA and DNA
     Return:
     - dictionary: a collection of alternative frames
     If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of frames
@@ -120,6 +151,50 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     "check_for_motifs": check_for_motifs,
     "search_for_alt_frames": search_for_alt_frames,
     "convert_to_nucl_acids": convert_to_nucl_acids,
+    "three_one_letter_code": three_one_letter_code,
+    "define_molecular_weight": define_molecular_weight,
+}
+amino_acids = {
+    "A": "Ala",
+    "C": "Cys",
+    "D": "Asp",
+    "E": "Glu",
+    "F": "Phe",
+    "G": "Gly",
+    "H": "His",
+    "I": "Ile",
+    "K": "Lys",
+    "L": "Leu",
+    "M": "Met",
+    "N": "Asn",
+    "P": "Pro",
+    "Q": "Gln",
+    "R": "Arg",
+    "S": "Ser",
+    "T": "Thr",
+    "V": "Val",
+    "W": "Trp",
+    "Y": "Tyr",
+    "a": "ala",
+    "c": "cys",
+    "d": "asp",
+    "e": "glu",
+    "f": "phe",
+    "g": "gly",
+    "h": "his",
+    "i": "ile",
+    "k": "lys",
+    "l": "leu",
+    "m": "met",
+    "n": "asn",
+    "p": "pro",
+    "q": "gln",
+    "r": "arg",
+    "s": "ser",
+    "t": "thr",
+    "v": "val",
+    "w": "trp",
+    "y": "tyr",
 }
 
 translation_rule = {
@@ -165,18 +240,46 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     "g": "ggc",
 }
 
+amino_acid_weights = {
+    "A": 89.09,
+    "C": 121.16,
+    "D": 133.10,
+    "E": 147.13,
+    "F": 165.19,
+    "G": 75.07,
+    "H": 155.16,
+    "I": 131.17,
+    "K": 146.19,
+    "L": 131.17,
+    "M": 149.21,
+    "N": 132.12,
+    "P": 115.13,
+    "Q": 146.15,
+    "R": 174.20,
+    "S": 105.09,
+    "T": 119.12,
+    "V": 117.15,
+    "W": 204.23,
+    "Y": 181.19,
+}
+
 
 def check_and_parse_user_input(*args, **kwargs):
     if len(args) == 0:
         raise ValueError("No sequences provided")
+    procedure = kwargs["procedure"]
+    if procedure not in procedures_to_functions.keys():
+        raise ValueError("Wrong procedure")
     sequences = list(args)
+    allowed_inputs = set(amino_acids.keys()).union(
+        set(amino_acids.values()).union(set("-"))
+    )
+    if procedure != "three_one_letter_code":
+        allowed_inputs.remove("-")
     for sequence in sequences:
-        if not all(letters in "".join(translation_rule.keys()) for letters in sequence):
+        if not all(letters in allowed_inputs for letters in sequence):
             raise ValueError("Invalid sequence given")
-    procedure = kwargs["procedure"]
     procedure_arguments = {}
-    if procedure not in procedures_to_functions.keys():
-        raise ValueError("Wrong procedure")
     if procedure == "check_for_motifs":
         if "motif" not in kwargs.keys():
             raise ValueError("Please provide desired motif")

From 93d2d5f36c4c001edb3d7b80966deef42dfb7fea Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sat, 30 Sep 2023 12:37:04 +0300
Subject: [PATCH 11/36] Add minor fixes in protein_tools.py

---
 HW4_Grigoriants/protein_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index ea3bf08..98b2b88 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -17,10 +17,10 @@ def check_for_motifs(sequences, motif):
             print(
                 f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
             )
-        all_positions[sequence] = positions
         print(f"Sequence: {sequence}")
         print(f"Motif: {motif}")
         print(f"Motif is not present in protein sequence{new_line}")
+        all_positions[sequence] = positions
     return all_positions
 
 

From d731697b430a2b3b18ad5924da0c2740551a432d Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sat, 30 Sep 2023 15:29:43 +0300
Subject: [PATCH 12/36] Add minor fixes in protein_tools.py

---
 HW4_Grigoriants/protein_tools.py | 47 ++++++++++++--------------------
 1 file changed, 18 insertions(+), 29 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 815f2e8..67366a3 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -33,6 +33,8 @@ def check_for_motifs(sequences, motif):
     for sequence in sequences:
         start = 0
         positions = []
+        print(f"Sequence: {sequence}")
+        print(f"Motif: {motif}")
         if motif in sequence:
             while True:
                 start = sequence.find(motif, start)
@@ -42,14 +44,10 @@ def check_for_motifs(sequences, motif):
                 # use += len(motif) not to count overlapping matches
                 start += 1
             pos_for_print = ", ".join(str(x) for x in positions)
-            print(f"Sequence: {sequence}")
-            print(f"Motif: {motif}")
             print(
                 f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
             )
         else:
-            print(f"Sequence: {sequence}")
-            print(f"Motif: {motif}")
             print(f"Motif is not present in protein sequence{new_line}")
         all_positions[sequence] = positions
     return all_positions
@@ -92,7 +90,7 @@ def search_for_alt_frames(sequences: str, alt_start_aa: str):
     return alternative_frames
 
 
-def convert_to_nucl_acids(sequences: str, nucl_acids: str):
+def convert_to_nucl_acids(sequences: list, nucl_acids: str):
     """
     Convert protein sequences to RNA or DNA sequences.
 
@@ -110,18 +108,12 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
     If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of frames
     If nucl_acids = 'both' output the name of a nucleic acid and a collection of frames
     """
-    # if nucl_acids not in {"DNA", "RNA", "both"}:
-    #     raise ValueError("Invalid nucl_acids argument!")
     rule_of_translation = sequences[0].maketrans(translation_rule)
     rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
     nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
         rna_seq = sequence.translate(rule_of_translation)
         reverse_dna_seq = rna_seq.translate(rule_of_transcription)[::-1]
-        # if "RNA" in nucl_acid_seqs.keys():
-        # nucl_acid_seqs["RNA"] += rna_seq + "  "
-        # else:
-        # nucl_acid_seqs["RNA"] = rna_seq + "  "
         if nucl_acids == "RNA":
             nucl_acid_seqs["RNA"].append(rna_seq)
             if sequence == sequences[-1]:
@@ -133,17 +125,6 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
         if nucl_acids == "both":
             nucl_acid_seqs["RNA"].append(rna_seq)
             nucl_acid_seqs["DNA"].append(reverse_dna_seq)
-    #     if "DNA" in nucl_acid_seqs.keys():
-    #         nucl_acid_seqs["DNA"] += reverse_dna_seq + "  "
-    #     else:
-    #         nucl_acid_seqs["DNA"] = reverse_dna_seq + "  "
-    # if nucl_acids == "RNA":
-    #     return nucl_acid_seqs["RNA"]
-    # elif nucl_acids == "DNA":
-    #     return nucl_acid_seqs["DNA"]
-    # elif nucl_acids == "both":
-    # for key, value in nucl_acid_seqs.items():
-    #     print(key, value)
     return nucl_acid_seqs
 
 
@@ -264,21 +245,29 @@ def convert_to_nucl_acids(sequences: str, nucl_acids: str):
 }
 
 
-def check_and_parse_user_input(*args, **kwargs):
-    if len(args) == 0:
+def check_and_parse_user_input(sequences, **kwargs):
+    if len(sequences) == 0:
         raise ValueError("No sequences provided")
     procedure = kwargs["procedure"]
     if procedure not in procedures_to_functions.keys():
         raise ValueError("Wrong procedure")
-    sequences = list(args)
     allowed_inputs = set(amino_acids.keys()).union(
         set(amino_acids.values()).union(set("-"))
     )
     if procedure != "three_one_letter_code":
         allowed_inputs.remove("-")
+        allowed_inputs -= set(amino_acids.values())
     for sequence in sequences:
-        if not all(letters in allowed_inputs for letters in sequence):
-            raise ValueError("Invalid sequence given")
+        allowed_inputs_seq = allowed_inputs
+        if procedure == "three_one_letter_code" and "-" in sequence:
+            allowed_inputs_seq -= set(amino_acids.keys())
+            if not all(
+                aminoacids in allowed_inputs_seq for aminoacids in sequence.split("-")
+            ):
+                raise ValueError("Invalid sequence given")
+        else:
+            if not all(aminoacids in allowed_inputs_seq for aminoacids in sequence):
+                raise ValueError("Invalid sequence given")
     procedure_arguments = {}
     if procedure == "check_for_motifs":
         if "motif" not in kwargs.keys():
@@ -301,6 +290,6 @@ def check_and_parse_user_input(*args, **kwargs):
     return procedure_arguments, procedure
 
 
-def run_protein_tools(*args, **kwargs):
-    procedure_arguments, procedure = check_and_parse_user_input(*args, **kwargs)
+def run_protein_tools(sequences=[], **kwargs):
+    procedure_arguments, procedure = check_and_parse_user_input(sequences, **kwargs)
     return procedures_to_functions[procedure](**procedure_arguments)

From e67042900ef2f2ee899e87256250215cb09c29a6 Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Sat, 30 Sep 2023 14:39:50 +0200
Subject: [PATCH 13/36] Add minor changes to 'convert_to_nucl_acids' function

---
 HW4_Grigoriants/protein_tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 67366a3..181421e 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -113,18 +113,18 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str):
     nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
         rna_seq = sequence.translate(rule_of_translation)
-        reverse_dna_seq = rna_seq.translate(rule_of_transcription)[::-1]
+        dna_seq = rna_seq.translate(rule_of_transcription)
         if nucl_acids == "RNA":
             nucl_acid_seqs["RNA"].append(rna_seq)
             if sequence == sequences[-1]:
                 del nucl_acid_seqs["DNA"]
         if nucl_acids == "DNA":
-            nucl_acid_seqs["DNA"].append(reverse_dna_seq)
+            nucl_acid_seqs["DNA"].append(dna_seq)
             if sequence == sequences[-1]:
                 del nucl_acid_seqs["RNA"]
         if nucl_acids == "both":
             nucl_acid_seqs["RNA"].append(rna_seq)
-            nucl_acid_seqs["DNA"].append(reverse_dna_seq)
+            nucl_acid_seqs["DNA"].append(dna_seq)
     return nucl_acid_seqs
 
 

From fe41d8514cd139b32fdd8bb00fc57d2e0455a1fd Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Sat, 30 Sep 2023 16:00:03 +0200
Subject: [PATCH 14/36] Change transcription rule in 'convert_to_nucl_acids'
 function

---
 HW4_Grigoriants/protein_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 181421e..a3d37bc 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -109,7 +109,7 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str):
     If nucl_acids = 'both' output the name of a nucleic acid and a collection of frames
     """
     rule_of_translation = sequences[0].maketrans(translation_rule)
-    rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
+    rule_of_transcription = sequences[0].maketrans("Uu", "Tt")
     nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
         rna_seq = sequence.translate(rule_of_translation)

From c8e982387dd29336428671997234a88312e9bc0d Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Sat, 30 Sep 2023 16:03:49 +0200
Subject: [PATCH 15/36] Correct inaccuracies in the dockstring of
 'convert_to_nucl_acids'

---
 HW4_Grigoriants/protein_tools.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index a3d37bc..c0c227d 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -104,9 +104,9 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str):
                      nucl_acids = 'DNA' - convert to DNA
                      nucl_acids = 'both' - convert to RNA and DNA
     Return:
-    - dictionary: a collection of alternative frames
-    If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of frames
-    If nucl_acids = 'both' output the name of a nucleic acid and a collection of frames
+    - dictionary: a collection of nucleic acids sequences
+    If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of sequences
+    If nucl_acids = 'both' output the name of a nucleic acid and a collection of sequences
     """
     rule_of_translation = sequences[0].maketrans(translation_rule)
     rule_of_transcription = sequences[0].maketrans("Uu", "Tt")

From cb03cf4073ede885c1207a0c4995a2c3636b9314 Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Sat, 30 Sep 2023 16:20:25 +0200
Subject: [PATCH 16/36] Change inaccuracies in the dockstring of
 'convert_to_nucl_acids'

---
 HW4_Grigoriants/protein_tools.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index c0c227d..c8fcef5 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -104,9 +104,7 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str):
                      nucl_acids = 'DNA' - convert to DNA
                      nucl_acids = 'both' - convert to RNA and DNA
     Return:
-    - dictionary: a collection of nucleic acids sequences
-    If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of sequences
-    If nucl_acids = 'both' output the name of a nucleic acid and a collection of sequences
+    - dictionary: output the name of nucleic acid and a collection of sequences
     """
     rule_of_translation = sequences[0].maketrans(translation_rule)
     rule_of_transcription = sequences[0].maketrans("Uu", "Tt")

From b193a6b8a6a24d392a8b7ef5d6ccf1cbcb0daa9d Mon Sep 17 00:00:00 2001
From: Ekaterina Shitik <shitik.ekaterina@gmail.com>
Date: Sat, 30 Sep 2023 16:26:50 +0200
Subject: [PATCH 17/36] Change annotation of 'search_for_alt_frames' function

---
 HW4_Grigoriants/protein_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index c8fcef5..76397ee 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -53,7 +53,7 @@ def check_for_motifs(sequences, motif):
     return all_positions
 
 
-def search_for_alt_frames(sequences: str, alt_start_aa: str):
+def search_for_alt_frames(sequences: list, alt_start_aa: str):
     """
     Search for alternative frames in a protein sequences
 

From f53914a10e3cafb397704a77f3e0513c01a0c565 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sat, 30 Sep 2023 18:46:06 +0300
Subject: [PATCH 18/36] Add minor fixes in protein_tools.py

---
 HW4_Grigoriants/protein_tools.py | 49 +++++++++++++++++++++++---------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 67366a3..d7621df 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -27,7 +27,26 @@ def define_molecular_weight(sequences):
     return sequences_weights
 
 
-def check_for_motifs(sequences, motif):
+def check_for_motifs(sequences, motif, overlapping):
+    """
+    Search for motifs - conserved amino acids residues in protein sequence
+
+    Search for one motif at a time
+    Search is letter case sensitive
+    Use one-letter aminoacids code for desired sequences and motifs 
+    Positions of AA in sequences are counted from 0
+    By default, overlapping matches are counted (see )
+    
+
+    Arguments:
+    - sequences (tuple(str), list(str)): sequences to check for given motif within
+    - motif (str): desired motif to check presense in every given sequence
+        Example: sequences = ["AMGAGW", "GAWSGRAGA"]
+                 motif = "GA"
+    Return:
+    - dictionary: sequences as keys (str), starting positions for presented motif (list) as values
+        Example: {'AMGAGW': [2], 'GAWSGRAGA': [0, 7]}
+    """
     new_line = "\n"  # used for user-friendly output
     all_positions = {}
     for sequence in sequences:
@@ -41,11 +60,14 @@ def check_for_motifs(sequences, motif):
                 if start == -1:
                     break
                 positions.append(start)
-                # use += len(motif) not to count overlapping matches
-                start += 1
-            pos_for_print = ", ".join(str(x) for x in positions)
+                if overlapping:
+                    start += 1 
+                else:
+                    start += len(motif)
+            print_pos = ", ".join(str(x) for x in positions)
+            print_pos = f'{print_pos}{new_line}'
             print(
-                f"Motif is present in protein sequence starting at positions: {pos_for_print}{new_line}"
+                f"Motif is present in protein sequence starting at positions: {print_pos}"
             )
         else:
             print(f"Motif is not present in protein sequence{new_line}")
@@ -71,22 +93,17 @@ def search_for_alt_frames(sequences: str, alt_start_aa: str):
     Return:
     - dictionary: the number of a sequence and a collection of alternative frames
     """
-    # if len(alt_start_aa) > 1:
-    #     raise ValueError("Invalid start codon!")
     alternative_frames = {}
     num_position = 0
     for sequence in sequences:
+        alternative_frames[sequence] = []
         for amino_acid in sequence[1:-3]:
+            alt_frame = "" 
             num_position += 1
             if amino_acid == alt_start_aa or amino_acid == alt_start_aa.swapcase():
-                key = sequences.index(sequence) + 1
-                if key in alternative_frames:
-                    alternative_frames[key] += sequence[num_position:] + "  "
-                else:
-                    alternative_frames[key] = sequence[num_position:] + "  "
+                alt_frame += sequence[num_position:]
+                alternative_frames[sequence].append(alt_frame)
         num_position = 0
-    # for key, value in alternative_frames.items():
-    #     print(key, value)
     return alternative_frames
 
 
@@ -273,6 +290,10 @@ def check_and_parse_user_input(sequences, **kwargs):
         if "motif" not in kwargs.keys():
             raise ValueError("Please provide desired motif")
         procedure_arguments["motif"] = kwargs["motif"]
+        if "overlapping" not in kwargs.keys():
+            procedure_arguments["overlapping"] = True
+        else:
+            procedure_arguments["overlapping"] = kwargs["overlapping"]
     elif procedure == "search_for_alt_frames":
         if "alt_start_aa" not in kwargs.keys():
             procedure_arguments["alt_start_aa"] = "M"

From 2ce8ada6c92ba1e6ebf23f776b9f5a184b0cbcf6 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 18:05:38 +0200
Subject: [PATCH 19/36] Add plan of README.md

---
 HW4_Grigoriants/README.md | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 8b13789..f6cbaac 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -1 +1,29 @@
+# Protein_tools.py
+## A tool to work with protein sequences
 
+*Proteins* are under 
+**Protein_tools.py** is an open-source program that facilitates operating with protein sequences 
+
+
+
+## Usage
+
+three_one_letter_code - convert one-letter code sequences to three-letter code sequences and vice-versa
+
+define_molecular_weight - determine the exact molecular weight of input protein sequences
+
+check_for_motifs - search for the motif of interest in input protein sequences 
+
+search_for_alt_frames - search for alternative frames that start with methyonine or other non-canonical start amino acids
+
+convert_to_nucl_acids - covert protein sequences to DNA and RNA
+
+run_protein_tools
+
+## Options
+
+## Examples
+
+## Troubleshooting
+
+## Contacts

From 18c1a760fb584496d75e4b3fb3dba704e6e303a8 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 19:31:41 +0200
Subject: [PATCH 20/36] Complete 'Usage'

---
 HW4_Grigoriants/README.md | 58 +++++++++++++++++++++++++++++++++------
 1 file changed, 49 insertions(+), 9 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index f6cbaac..aa7553d 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -1,29 +1,69 @@
 # Protein_tools.py
 ## A tool to work with protein sequences
 
-*Proteins* are under 
-**Protein_tools.py** is an open-source program that facilitates operating with protein sequences 
+*Proteins* are under the constant focus of scientists. Currently, there are an enormous amount of tools to operate with nucleotide sequences, however, the same ones for proteins are extremely rare. 
 
 
+`Protein_tools.py` is an open-source program that facilitates working with protein sequences. 
+
+*В моём представлении здесь должна быть картинка*
 
 ## Usage
+The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences**, name of procedure and relevant arguments. If you have three-letter amino acid sequences you could convert them by using `three_one_letter_code` procedure.
+
+To start with the program run the following command:
 
-three_one_letter_code - convert one-letter code sequences to three-letter code sequences and vice-versa
+`run_protein_tools([sequence_1, sequence_2 ..., sequence_n], procedure, ...)`
 
-define_molecular_weight - determine the exact molecular weight of input protein sequences
+Where:
+- [sequence_1, sequence_2 ..., sequence_n] - a list of protein sequences
+- procedure - a type of procedure to use that is inputed in *string* type
+- ... - an additional argument that is to be inputed in *string* type
 
-check_for_motifs - search for the motif of interest in input protein sequences 
+## Options
 
-search_for_alt_frames - search for alternative frames that start with methyonine or other non-canonical start amino acids
+The program has five types of procedures:
 
-convert_to_nucl_acids - covert protein sequences to DNA and RNA
+#### `three_one_letter_code`
 
-run_protein_tools
+- The main aim - to convert three-letter amino acid sequences to one-letter ones and vice-versa
+- An additional argument: no
 
-## Options
+#### `define_molecular_weight` 
+
+- The main aim - to determine the exact molecular weight of protein sequences
+- An additional argument: no
+
+`check_for_motifs` - to search for the motif of interest in protein sequences
+
+`search_for_alt_frames` - to look for alternative frames that start with methyonine or other non-canonical start amino acids
+
+`convert_to_nucl_acids` - covert protein sequences to DNA and RNA
 
+**Requirments**
+
+Use only sequences that are encoded with one-letter. Если у вас трёхбуквенный код используйте наше функции для конвертации
+Трёхбуквенный код также используется для конвертации. Он разделён дефисами 
 ## Examples
 
 ## Troubleshooting
 
 ## Contacts
+Authors:
+
+Vladimir Grigoriants 
+
+Tulyavko Vlada 
+
+Ekaterina Shitik (EkaterinShitik)
+
+
+**Список процедур:**
+
+- `transcribe` — напечатать транскрибированную последовательность*
+- `reverse` — напечатать перевёрнутую последовательность
+- `complement` — напечатать комплементарную последовательность
+- `reverse_complement` — напечатать обратную комплементарную последовательность
+- `gc_count` — посчитать содержание нуклеотидов *G* и *C* в процентах
+  
+\* Обратная транскрипция в рамках данной процедуры также учитывается (РНК в ДНК)

From ea3be7ed76fa1d34c8e33f3495522c262c6fac46 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 19:47:05 +0200
Subject: [PATCH 21/36] Add preliminary 'Options'

---
 HW4_Grigoriants/README.md | 34 +++++++++++++++-------------------
 1 file changed, 15 insertions(+), 19 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index aa7553d..62c974b 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -24,26 +24,33 @@ Where:
 
 The program has five types of procedures:
 
-#### `three_one_letter_code`
+ `three_one_letter_code`
 
 - The main aim - to convert three-letter amino acid sequences to one-letter ones and vice-versa
 - An additional argument: no
 
-#### `define_molecular_weight` 
+ `define_molecular_weight` 
 
 - The main aim - to determine the exact molecular weight of protein sequences
 - An additional argument: no
 
-`check_for_motifs` - to search for the motif of interest in protein sequences
+ `check_for_motifs` 
 
-`search_for_alt_frames` - to look for alternative frames that start with methyonine or other non-canonical start amino acids
+- The main aim - to search for the motif of interest in protein sequences
+- An additional argument: motif (*str*)
 
-`convert_to_nucl_acids` - covert protein sequences to DNA and RNA
+ `search_for_alt_frames` 
 
-**Requirments**
+- The main aim - to look for alternative frames that start with methyonine or other non-canonical start amino acids
+- An additional argument: alt_start_aa (*str*)
+- Use alt_start_aa only for non-canonical start amino acids
+- Without alt_start_aa the procedure find alternative frames that start with methyonine
+
+`convert_to_nucl_acids` 
+- Convert protein sequences to DNA, RNA or both nucleic acid sequences
+- The program use the most frequent codons in human that could be found [here](https://www.genscript.com/tools/codon-frequency-table)
+- An additional argument: nucl_acids (*str*)
 
-Use only sequences that are encoded with one-letter. Если у вас трёхбуквенный код используйте наше функции для конвертации
-Трёхбуквенный код также используется для конвертации. Он разделён дефисами 
 ## Examples
 
 ## Troubleshooting
@@ -56,14 +63,3 @@ Vladimir Grigoriants
 Tulyavko Vlada 
 
 Ekaterina Shitik (EkaterinShitik)
-
-
-**Список процедур:**
-
-- `transcribe` — напечатать транскрибированную последовательность*
-- `reverse` — напечатать перевёрнутую последовательность
-- `complement` — напечатать комплементарную последовательность
-- `reverse_complement` — напечатать обратную комплементарную последовательность
-- `gc_count` — посчитать содержание нуклеотидов *G* и *C* в процентах
-  
-\* Обратная транскрипция в рамках данной процедуры также учитывается (РНК в ДНК)

From a1c1c23cd4522deb9089b290eabbbfb4d53510fe Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 20:10:48 +0200
Subject: [PATCH 22/36] Add preliminary 'Examples'

---
 HW4_Grigoriants/README.md | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 62c974b..8865262 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -47,11 +47,20 @@ The program has five types of procedures:
 - Without alt_start_aa the procedure find alternative frames that start with methyonine
 
 `convert_to_nucl_acids` 
-- Convert protein sequences to DNA, RNA or both nucleic acid sequences
+- The main aim - to convert protein sequences to DNA, RNA or both nucleic acid sequences
 - The program use the most frequent codons in human that could be found [here](https://www.genscript.com/tools/codon-frequency-table)
 - An additional argument: nucl_acids (*str*)
+  
 
 ## Examples
+```python
+run_protein_tools(['met-Asn-Tyr', 'Ile-Ala-Ala'], procedure = 'three_one_letter_code')  # ['mNY', 'IAA']
+run_protein_tools(['mNY','IAA'], procedure = 'three_one_letter_code')  # ['met-Asn-Tyr', 'Ile-Ala-Ala']
+run_protein_tools(['MNY','IAA'], procedure = 'define_molecular_weight')  # [462.52000000000004, 309.35]
+```
+```python
+run_protein_tools(['mNY','IAA'], procedure = 'three_one_letter_code') #  ['met-Asn-Tyr', 'Ile-Ala-Ala']
+```
 
 ## Troubleshooting
 

From 454d703dcfa545100c2858740b5f706b32104736 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 20:40:46 +0200
Subject: [PATCH 23/36] Complete 'Examples'

---
 HW4_Grigoriants/README.md | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 8865262..6f43f17 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -42,8 +42,9 @@ The program has five types of procedures:
  `search_for_alt_frames` 
 
 - The main aim - to look for alternative frames that start with methyonine or other non-canonical start amino acids
+- Ignores the last three amino acids due to the insignicance of proteins of this length
 - An additional argument: alt_start_aa (*str*)
-- Use alt_start_aa only for non-canonical start amino acids
+- Use alt_start_aa **only for non-canonical start amino acids**
 - Without alt_start_aa the procedure find alternative frames that start with methyonine
 
 `convert_to_nucl_acids` 
@@ -54,12 +55,33 @@ The program has five types of procedures:
 
 ## Examples
 ```python
-run_protein_tools(['met-Asn-Tyr', 'Ile-Ala-Ala'], procedure = 'three_one_letter_code')  # ['mNY', 'IAA']
-run_protein_tools(['mNY','IAA'], procedure = 'three_one_letter_code')  # ['met-Asn-Tyr', 'Ile-Ala-Ala']
-run_protein_tools(['MNY','IAA'], procedure = 'define_molecular_weight')  # [462.52000000000004, 309.35]
-```
-```python
-run_protein_tools(['mNY','IAA'], procedure = 'three_one_letter_code') #  ['met-Asn-Tyr', 'Ile-Ala-Ala']
+# three_one_letter_code
+run_protein_tools(['met-Asn-Tyr', 'Ile-Ala-Ala'], procedure='three_one_letter_code')  # ['mNY', 'IAA']
+run_protein_tools(['mNY','IAA'], procedure='three_one_letter_code')  # ['met-Asn-Tyr', 'Ile-Ala-Ala']
+
+# define_molecular_weight
+run_protein_tools(['MNY','IAA'], procedure='define_molecular_weight')  # [462.52000000000004, 309.35]
+
+# check_for_motifs
+run_protein_tools(['mNY','IAA'], procedure='check_for_motifs', motif='NY')
+# Sequence: mNY
+# Motif: NY
+# Motif is present in protein sequence starting at positions: 1
+# Sequence: IAA
+# Motif: NY
+# Motif is not present in protein sequence
+# {'mNY': [1], 'IAA': []}
+
+# search_for_alt_frames
+run_protein_tools(['mNYQTMSPYYDMId'], procedure='search_for_alt_frames')  # {'mNYQTMSPYYDMId': ['MSPYYDMId']}
+run_protein_tools(['mNYTQTSP'], procedure='search_for_alt_frames', alt_start_aa='T')  # {'mNYTQTSP': ['TQTSP']}
+
+# convert_to_nucl_acids
+run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'RNA')  # {'RNA': ['AUGAACUAU']}
+run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'DNA')  # {'DNA': ['ATGAACTAT']}
+run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both')
+# {'RNA': ['AUGAACUAU'], 'DNA': ['ATGAACTAT']}
+
 ```
 
 ## Troubleshooting

From e5628a551216c8869db98df2777e2c9ae3dc5659 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 20:58:58 +0200
Subject: [PATCH 24/36] Complete four first parts

---
 HW4_Grigoriants/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 6f43f17..c898970 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -9,7 +9,7 @@
 *В моём представлении здесь должна быть картинка*
 
 ## Usage
-The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences**, name of procedure and relevant arguments. If you have three-letter amino acid sequences you could convert them by using `three_one_letter_code` procedure.
+The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences**, a name of procedure and a relevant argument. If you have three-letter amino acids sequences you could convert them by using `three_one_letter_code` procedure in advance. Before using this procedure, check the *Options*.
 
 To start with the program run the following command:
 
@@ -27,6 +27,7 @@ The program has five types of procedures:
  `three_one_letter_code`
 
 - The main aim - to convert three-letter amino acid sequences to one-letter ones and vice-versa
+- In case of three-to-one translation the names of amino acids **must be separated with hyphen**
 - An additional argument: no
 
  `define_molecular_weight` 
@@ -51,6 +52,7 @@ The program has five types of procedures:
 - The main aim - to convert protein sequences to DNA, RNA or both nucleic acid sequences
 - The program use the most frequent codons in human that could be found [here](https://www.genscript.com/tools/codon-frequency-table)
 - An additional argument: nucl_acids (*str*)
+- Use as nucl_acids only DNA, RNA or both (for more detailes, check *Examples*)
   
 
 ## Examples
@@ -79,8 +81,7 @@ run_protein_tools(['mNYTQTSP'], procedure='search_for_alt_frames', alt_start_aa=
 # convert_to_nucl_acids
 run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'RNA')  # {'RNA': ['AUGAACUAU']}
 run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'DNA')  # {'DNA': ['ATGAACTAT']}
-run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both')
-# {'RNA': ['AUGAACUAU'], 'DNA': ['ATGAACTAT']}
+run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both') # {'RNA': ['AUGAACUAU'], 'DNA': ['ATGAACTAT']}
 
 ```
 

From 53a75563b9db6cabb24aec81c6ec5afc76045dfe Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 21:40:03 +0200
Subject: [PATCH 25/36] Complete all parts except for contacts

---
 HW4_Grigoriants/README.md | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index c898970..70ad9b2 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -41,7 +41,7 @@ The program has five types of procedures:
 - An additional argument: motif (*str*)
 
  `search_for_alt_frames` 
-
+ 
 - The main aim - to look for alternative frames that start with methyonine or other non-canonical start amino acids
 - Ignores the last three amino acids due to the insignicance of proteins of this length
 - An additional argument: alt_start_aa (*str*)
@@ -49,6 +49,7 @@ The program has five types of procedures:
 - Without alt_start_aa the procedure find alternative frames that start with methyonine
 
 `convert_to_nucl_acids` 
+
 - The main aim - to convert protein sequences to DNA, RNA or both nucleic acid sequences
 - The program use the most frequent codons in human that could be found [here](https://www.genscript.com/tools/codon-frequency-table)
 - An additional argument: nucl_acids (*str*)
@@ -87,6 +88,17 @@ run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both
 
 ## Troubleshooting
 
+|  Type of the problem                                             |  Probable cause
+| ------------------------------------------------------------ |--------------------
+| Output does not correspond the expected resultes             | The name of procedure is wrong. You see the results of another procedure
+| ValueError: No sequences provided                          | A list of sequences are not inputed
+| ValueError: Wrong procedure                                | The procedure does not exist in this program
+| TypeError: takes from 0 to 1 positional arguments but n were given  | Sequences are not collected into the list type
+| ValueError: Invalid sequence given                         | The sequences do not correspond to standard amino acid code
+| ValueError: Please provide desired motif                   | There are no an additional argument *motif* in `check_for_motifs`
+| ValueError: Invalid start AA!                              | There is more than one letter in an additional argument *alt_start_aa* in `search_for_alt_frames`
+| ValueError: Please provide desired type of nucl_acids      | There are no an additional argument *nucl_acids* in `convert_to_nucl_acids`
+| ValueError: Invalid nucl_acids argument                    | An additional argument in `convert_to_nucl_acids` is written incorrectly
 ## Contacts
 Authors:
 

From 33744ad82a6a6ebfe7bcd3ffcb71ff118eddb332 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 21:48:06 +0200
Subject: [PATCH 26/36] Complete all parts

---
 HW4_Grigoriants/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 70ad9b2..c41a0e7 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -102,8 +102,8 @@ run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both
 ## Contacts
 Authors:
 
-Vladimir Grigoriants 
+Vladimir Grigoriants (*адрес*)
 
-Tulyavko Vlada 
+Tulyavko Vlada (*адрес*)
 
-Ekaterina Shitik (EkaterinShitik)
+Ekaterina Shitik (shitik.ekaterina@gmail.com)

From 0fbb1848bd7d373792cfe088b6888436645c0cf3 Mon Sep 17 00:00:00 2001
From: EkaterinShitik <144039338+EkaterinShitik@users.noreply.github.com>
Date: Sat, 30 Sep 2023 21:51:22 +0200
Subject: [PATCH 27/36] Add minor changes in 'Options'

---
 HW4_Grigoriants/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index c41a0e7..227f9eb 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -43,7 +43,7 @@ The program has five types of procedures:
  `search_for_alt_frames` 
  
 - The main aim - to look for alternative frames that start with methyonine or other non-canonical start amino acids
-- Ignores the last three amino acids due to the insignicance of proteins of this length
+- Ignores the last three amino acids due to the insignicance of alternative frames of this length
 - An additional argument: alt_start_aa (*str*)
 - Use alt_start_aa **only for non-canonical start amino acids**
 - Without alt_start_aa the procedure find alternative frames that start with methyonine

From d9bdb5072c5cafdab390d4e5b18fc6ac8aad0cc5 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 00:08:21 +0300
Subject: [PATCH 28/36] Add dockstrings to main function, search_for_motifs
 function, add minor fixes, add dictionaries.py

---
 HW4_Grigoriants/dictionaries.py  | 106 +++++++++++++++
 HW4_Grigoriants/protein_tools.py | 223 ++++++++++++-------------------
 2 files changed, 194 insertions(+), 135 deletions(-)
 create mode 100644 HW4_Grigoriants/dictionaries.py

diff --git a/HW4_Grigoriants/dictionaries.py b/HW4_Grigoriants/dictionaries.py
new file mode 100644
index 0000000..f4a1ada
--- /dev/null
+++ b/HW4_Grigoriants/dictionaries.py
@@ -0,0 +1,106 @@
+amino_acids = {
+    "A": "Ala",
+    "C": "Cys",
+    "D": "Asp",
+    "E": "Glu",
+    "F": "Phe",
+    "G": "Gly",
+    "H": "His",
+    "I": "Ile",
+    "K": "Lys",
+    "L": "Leu",
+    "M": "Met",
+    "N": "Asn",
+    "P": "Pro",
+    "Q": "Gln",
+    "R": "Arg",
+    "S": "Ser",
+    "T": "Thr",
+    "V": "Val",
+    "W": "Trp",
+    "Y": "Tyr",
+    "a": "ala",
+    "c": "cys",
+    "d": "asp",
+    "e": "glu",
+    "f": "phe",
+    "g": "gly",
+    "h": "his",
+    "i": "ile",
+    "k": "lys",
+    "l": "leu",
+    "m": "met",
+    "n": "asn",
+    "p": "pro",
+    "q": "gln",
+    "r": "arg",
+    "s": "ser",
+    "t": "thr",
+    "v": "val",
+    "w": "trp",
+    "y": "tyr",
+}
+translation_rule = {
+    "F": "UUU",
+    "f": "uuu",
+    "L": "CUG",
+    "l": "cug",
+    "I": "AUU",
+    "i": "auu",
+    "M": "AUG",
+    "m": "aug",
+    "V": "GUG",
+    "v": "gug",
+    "P": "CCG",
+    "p": "ccg",
+    "T": "ACC",
+    "t": "acc",
+    "A": "GCG",
+    "a": "gcg",
+    "Y": "UAU",
+    "y": "uau",
+    "H": "CAU",
+    "h": "cau",
+    "Q": "CAG",
+    "q": "cag",
+    "N": "AAC",
+    "n": "aac",
+    "K": "AAA",
+    "k": "aaa",
+    "D": "GAU",
+    "d": "gau",
+    "E": "GAA",
+    "e": "gaa",
+    "C": "UGC",
+    "c": "ugc",
+    "W": "UGG",
+    "w": "ugg",
+    "R": "CGU",
+    "r": "cgu",
+    "S": "AGC",
+    "s": "agc",
+    "G": "GGC",
+    "g": "ggc",
+}
+amino_acid_weights = {
+    "A": 89.09,
+    "C": 121.16,
+    "D": 133.10,
+    "E": 147.13,
+    "F": 165.19,
+    "G": 75.07,
+    "H": 155.16,
+    "I": 131.17,
+    "K": 146.19,
+    "L": 131.17,
+    "M": 149.21,
+    "N": 132.12,
+    "P": 115.13,
+    "Q": 146.15,
+    "R": 174.20,
+    "S": 105.09,
+    "T": 119.12,
+    "V": 117.15,
+    "W": 204.23,
+    "Y": 181.19,
+}
diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index d7621df..9d2290c 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,53 +1,60 @@
-def three_one_letter_code(sequences):
+import dictionaries
+
+
+def three_one_letter_code(sequences) -> list:
     inversed_sequences = []
     for sequence in sequences:
         inversed_sequence = ""
         if "-" not in sequence:
             for letter in sequence:
-                inversed_sequence += amino_acids[letter] + "-"
+                inversed_sequence += dictionaries.amino_acids[letter] + "-"
             inversed_sequence = inversed_sequence[:-1]
             inversed_sequences.append(inversed_sequence)
         else:
             aa_splitted = sequence.split("-")
             for aa in aa_splitted:
-                inversed_sequence += list(amino_acids.keys())[
-                    list(amino_acids.values()).index(aa)
+                inversed_sequence += list(dictionaries.amino_acids.keys())[
+                    list(dictionaries.amino_acids.values()).index(aa)
                 ]
             inversed_sequences.append(inversed_sequence)
     return inversed_sequences
 
 
-def define_molecular_weight(sequences):
+def define_molecular_weight(sequences) -> dict:
     sequences_weights = []
     for sequence in sequences:
         sequence_weight = 0
         for letter in sequence:
-            sequence_weight += amino_acid_weights[letter]
+            sequence_weight += dictionaries.amino_acid_weights[letter.upper()]
         sequences_weights.append(sequence_weight)
     return sequences_weights
 
 
-def check_for_motifs(sequences, motif, overlapping):
+def search_for_motifs(
+    sequences: (tuple(str) or list(str)), motif: str, overlapping: bool
+) -> dict:
     """
     Search for motifs - conserved amino acids residues in protein sequence
 
     Search for one motif at a time
     Search is letter case sensitive
-    Use one-letter aminoacids code for desired sequences and motifs 
+    Use one-letter aminoacids code for desired sequences and motifs
     Positions of AA in sequences are counted from 0
-    By default, overlapping matches are counted (see )
-    
+    By default, overlapping matches are counted
+
 
     Arguments:
-    - sequences (tuple(str), list(str)): sequences to check for given motif within
-    - motif (str): desired motif to check presense in every given sequence
+    - sequences (tuple(str) or list(str)): sequences to check for given motif within
         Example: sequences = ["AMGAGW", "GAWSGRAGA"]
-                 motif = "GA"
+    - motif (str): desired motif to check presense in every given sequence
+        Example: motif = "GA"
+    - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)
+        Example: overlapping = False
     Return:
-    - dictionary: sequences as keys (str), starting positions for presented motif (list) as values
+    - dictionary: sequences (str) as keys , starting positions for presented motif (list) as values
         Example: {'AMGAGW': [2], 'GAWSGRAGA': [0, 7]}
     """
-    new_line = "\n"  # used for user-friendly output
+    new_line = "\n"
     all_positions = {}
     for sequence in sequences:
         start = 0
@@ -61,11 +68,11 @@ def check_for_motifs(sequences, motif, overlapping):
                     break
                 positions.append(start)
                 if overlapping:
-                    start += 1 
+                    start += 1
                 else:
                     start += len(motif)
             print_pos = ", ".join(str(x) for x in positions)
-            print_pos = f'{print_pos}{new_line}'
+            print_pos = f"{print_pos}{new_line}"
             print(
                 f"Motif is present in protein sequence starting at positions: {print_pos}"
             )
@@ -75,7 +82,7 @@ def check_for_motifs(sequences, motif, overlapping):
     return all_positions
 
 
-def search_for_alt_frames(sequences: str, alt_start_aa: str):
+def search_for_alt_frames(sequences: str, alt_start_aa: str) -> dict:
     """
     Search for alternative frames in a protein sequences
 
@@ -98,7 +105,7 @@ def search_for_alt_frames(sequences: str, alt_start_aa: str):
     for sequence in sequences:
         alternative_frames[sequence] = []
         for amino_acid in sequence[1:-3]:
-            alt_frame = "" 
+            alt_frame = ""
             num_position += 1
             if amino_acid == alt_start_aa or amino_acid == alt_start_aa.swapcase():
                 alt_frame += sequence[num_position:]
@@ -107,7 +114,7 @@ def search_for_alt_frames(sequences: str, alt_start_aa: str):
     return alternative_frames
 
 
-def convert_to_nucl_acids(sequences: list, nucl_acids: str):
+def convert_to_nucl_acids(sequences: list, nucl_acids: str) -> dict:
     """
     Convert protein sequences to RNA or DNA sequences.
 
@@ -125,7 +132,7 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str):
     If nucl_acids = 'RNA' or nucl_acids = 'DNA' output a collection of frames
     If nucl_acids = 'both' output the name of a nucleic acid and a collection of frames
     """
-    rule_of_translation = sequences[0].maketrans(translation_rule)
+    rule_of_translation = sequences[0].maketrans(dictionaries.translation_rule)
     rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
     nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
@@ -146,138 +153,45 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str):
 
 
 procedures_to_functions = {
-    "check_for_motifs": check_for_motifs,
+    "search_for_motifs": search_for_motifs,
     "search_for_alt_frames": search_for_alt_frames,
     "convert_to_nucl_acids": convert_to_nucl_acids,
     "three_one_letter_code": three_one_letter_code,
     "define_molecular_weight": define_molecular_weight,
 }
-amino_acids = {
-    "A": "Ala",
-    "C": "Cys",
-    "D": "Asp",
-    "E": "Glu",
-    "F": "Phe",
-    "G": "Gly",
-    "H": "His",
-    "I": "Ile",
-    "K": "Lys",
-    "L": "Leu",
-    "M": "Met",
-    "N": "Asn",
-    "P": "Pro",
-    "Q": "Gln",
-    "R": "Arg",
-    "S": "Ser",
-    "T": "Thr",
-    "V": "Val",
-    "W": "Trp",
-    "Y": "Tyr",
-    "a": "ala",
-    "c": "cys",
-    "d": "asp",
-    "e": "glu",
-    "f": "phe",
-    "g": "gly",
-    "h": "his",
-    "i": "ile",
-    "k": "lys",
-    "l": "leu",
-    "m": "met",
-    "n": "asn",
-    "p": "pro",
-    "q": "gln",
-    "r": "arg",
-    "s": "ser",
-    "t": "thr",
-    "v": "val",
-    "w": "trp",
-    "y": "tyr",
-}
 
-translation_rule = {
-    "F": "UUU",
-    "f": "uuu",
-    "L": "CUG",
-    "l": "cug",
-    "I": "AUU",
-    "i": "auu",
-    "M": "AUG",
-    "m": "aug",
-    "V": "GUG",
-    "v": "gug",
-    "P": "CCG",
-    "p": "ccg",
-    "T": "ACC",
-    "t": "acc",
-    "A": "GCG",
-    "a": "gcg",
-    "Y": "UAU",
-    "y": "uau",
-    "H": "CAU",
-    "h": "cau",
-    "Q": "CAG",
-    "q": "cag",
-    "N": "AAC",
-    "n": "aac",
-    "K": "AAA",
-    "k": "aaa",
-    "D": "GAU",
-    "d": "gau",
-    "E": "GAA",
-    "e": "gaa",
-    "C": "UGC",
-    "c": "ugc",
-    "W": "UGG",
-    "w": "ugg",
-    "R": "CGU",
-    "r": "cgu",
-    "S": "AGC",
-    "s": "agc",
-    "G": "GGC",
-    "g": "ggc",
-}
 
-amino_acid_weights = {
-    "A": 89.09,
-    "C": 121.16,
-    "D": 133.10,
-    "E": 147.13,
-    "F": 165.19,
-    "G": 75.07,
-    "H": 155.16,
-    "I": 131.17,
-    "K": 146.19,
-    "L": 131.17,
-    "M": 149.21,
-    "N": 132.12,
-    "P": 115.13,
-    "Q": 146.15,
-    "R": 174.20,
-    "S": 105.09,
-    "T": 119.12,
-    "V": 117.15,
-    "W": 204.23,
-    "Y": 181.19,
-}
+def check_and_parse_user_input(
+    sequences: list(str) or tuple(str), **kwargs
+) -> dict and str:
+    """
+    Check if user input can be correctly processed
+    Provide arguments for desired procedures
+    Needed for main function to correctly call desired procedure
 
+    Arguments:
+    - sequences (list(str) or tuple(str)): sequences to process
+    - **kwargs - needed arguments for completion of desired procedure
 
-def check_and_parse_user_input(sequences, **kwargs):
+    Return:
+    - string: procedure name
+    - dictionary: a collection of procedure arguments and their values
+    """
     if len(sequences) == 0:
         raise ValueError("No sequences provided")
     procedure = kwargs["procedure"]
     if procedure not in procedures_to_functions.keys():
         raise ValueError("Wrong procedure")
-    allowed_inputs = set(amino_acids.keys()).union(
-        set(amino_acids.values()).union(set("-"))
+    allowed_inputs = set(dictionaries.amino_acids.keys()).union(
+        set(dictionaries.amino_acids.values()).union(set("-"))
     )
     if procedure != "three_one_letter_code":
         allowed_inputs.remove("-")
-        allowed_inputs -= set(amino_acids.values())
+        allowed_inputs -= set(dictionaries.amino_acids.values())
     for sequence in sequences:
         allowed_inputs_seq = allowed_inputs
         if procedure == "three_one_letter_code" and "-" in sequence:
-            allowed_inputs_seq -= set(amino_acids.keys())
+            allowed_inputs_seq -= set(dictionaries.amino_acids.keys())
             if not all(
                 aminoacids in allowed_inputs_seq for aminoacids in sequence.split("-")
             ):
@@ -286,7 +200,7 @@ def check_and_parse_user_input(sequences, **kwargs):
             if not all(aminoacids in allowed_inputs_seq for aminoacids in sequence):
                 raise ValueError("Invalid sequence given")
     procedure_arguments = {}
-    if procedure == "check_for_motifs":
+    if procedure == "search_for_motifs":
         if "motif" not in kwargs.keys():
             raise ValueError("Please provide desired motif")
         procedure_arguments["motif"] = kwargs["motif"]
@@ -311,6 +225,45 @@ def check_and_parse_user_input(sequences, **kwargs):
     return procedure_arguments, procedure
 
 
-def run_protein_tools(sequences=[], **kwargs):
+def run_protein_tools(sequences: list(str) or tuple(str), **kwargs: str):
+    """
+    Main function to process protein sequence by one of the developed tools.
+    Run one procedure at a time:
+    - Search for conserved amino acids residues in protein sequence
+    - Search for alternative frames in a protein sequences
+    - Convert protein sequences to RNA or DNA sequences
+    -
+
+    All functions are letter case sensitive
+    Provide protein sequence in one letter code.
+    You can obtain one letter code from three letter code with *three_one_letter_code*
+    If more information needed please see Readme or desired dockstring
+
+    Arguments:
+    - sequences (list(str) or tuple(str)): sequences to process
+    - procedure (str): desired procedure:
+        - "search_for_motifs"
+        - "search_for_alt_frames"
+        - "convert_to_nucl_acids"
+        - "three_one_letter_code"
+        - "define_molecular_weight"
+    For "search_for_motif" procedure provide:
+        - motif (str): desired motif to check presense in every given sequence
+            Example: motif = "GA"
+        - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)
+            Example: overlapping = False
+    For "search_for_alt_frames" procedure provide:
+        - alt_start_aa (str): the name of an amino acid that is encoded by alternative start codon (Optional)
+            Example: alt_start_aa = 'I'
+    For "convert_to_nucl_acids" procedure provide:
+        - nucl_acids (str): the nucleic acid to convert to
+            Example: nucl_acids = 'RNA'
+                     nucl_acids = 'DNA'
+                     nucl_acids = 'both'
+
+    Return:
+    - dict: Dictionary with processed sequences. Depends on desired tool
+            Please see Readme or desired dockstring
+    """
     procedure_arguments, procedure = check_and_parse_user_input(sequences, **kwargs)
     return procedures_to_functions[procedure](**procedure_arguments)

From 78fc1e04de148832c0075f4671c81bff97cd6070 Mon Sep 17 00:00:00 2001
From: Vlada Tuliavko <vlada@DESKTOP-70RJ25E>
Date: Sun, 1 Oct 2023 00:13:31 +0300
Subject: [PATCH 29/36] Add docstrings to three_one_letter_code and
 define_molecular_weight functions

---
 HW4_Grigoriants/protein_tools.py | 44 ++++++++++++++++++++++++++++----
 1 file changed, 39 insertions(+), 5 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 67366a3..2e8c17d 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,4 +1,21 @@
-def three_one_letter_code(sequences):
+def three_one_letter_code(sequences: str) -> list:
+    """
+    Reverse the protein sequences from one-letter to three-letter format and vice-versa
+
+    Case 1: get three-letter sequence
+    Use one-letter amino-acids sequences of any letter case
+        
+    Case 2: get one-letter sequence
+    Use three-letter amino-acid separated by '-' sequences 
+        
+    Arguments:
+    - sequences (tuple(str) or list(str)): protein sequences to convert
+    Example: ['WAG', 'MkqRe', 'msrlk', 'Met-Ala-Gly', 'Met-arg-asn-Trp-Ala-Gly', 'arg-asn-trp']
+
+    Return:
+    - list: one-letter/three-letter protein sequences 
+    Example: ['Met-Ala-Gly', 'Met-arg-asn-Trp-Ala-Gly', 'arg-asn-trp', 'WAG', 'MkqRe', 'rlk']
+    """
     inversed_sequences = []
     for sequence in sequences:
         inversed_sequence = ""
@@ -17,13 +34,30 @@ def three_one_letter_code(sequences):
     return inversed_sequences
 
 
-def define_molecular_weight(sequences):
-    sequences_weights = []
+def define_molecular_weight(sequences: str) -> dict:
+    """
+    Define molecular weight of the protein sequences
+
+    Use one-letter amino-acids sequences of any letter case
+    The molecular weight is:
+    - a sum of masses of each atom constituting a molecule
+    - expressed in units called daltons (Da)
+    - rounded to hundredths
+    
+    Arguments:
+    - sequences (tuple(str) or list(str)): protein sequences to convert
+
+    Return:
+    - dictionary: protein sequences as keys and molecular masses as values
+    Example: {'WAG': 332.39, 'MkqRe': 690.88, 'msrlk': 633.86}
+    """
+    sequences_weights = {}
     for sequence in sequences:
         sequence_weight = 0
         for letter in sequence:
-            sequence_weight += amino_acid_weights[letter]
-        sequences_weights.append(sequence_weight)
+            sequence_weight += amino_acid_weights[letter.upper()]
+        sequence_weight -= (len(sequence) - 1) * 18 #deduct water from peptide bond
+        sequences_weights[sequence] = round(sequence_weight, 2) 
     return sequences_weights
 
 

From fdf4b608d08dedae6ffbd0ca882a88eeb5db2061 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 00:13:50 +0300
Subject: [PATCH 30/36] Add minor fixes

---
 HW4_Grigoriants/protein_tools.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 9d2290c..6662b80 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -237,7 +237,7 @@ def run_protein_tools(sequences: list(str) or tuple(str), **kwargs: str):
     All functions are letter case sensitive
     Provide protein sequence in one letter code.
     You can obtain one letter code from three letter code with *three_one_letter_code*
-    If more information needed please see Readme or desired dockstring
+    If more information needed please see Readme or desired docstring
 
     Arguments:
     - sequences (list(str) or tuple(str)): sequences to process
@@ -263,7 +263,7 @@ def run_protein_tools(sequences: list(str) or tuple(str), **kwargs: str):
 
     Return:
     - dict: Dictionary with processed sequences. Depends on desired tool
-            Please see Readme or desired dockstring
+            Please see Readme or desired docstring
     """
     procedure_arguments, procedure = check_and_parse_user_input(sequences, **kwargs)
     return procedures_to_functions[procedure](**procedure_arguments)

From 6794624aeb6477cac093769d999045bf48e213bd Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 02:12:49 +0300
Subject: [PATCH 31/36] Add mifixes to docstrings

---
 HW4_Grigoriants/protein_tools.py | 142 ++++++++++++++++---------------
 1 file changed, 74 insertions(+), 68 deletions(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 7d9b86b..11e1adc 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -5,19 +5,21 @@ def three_one_letter_code(sequences: str) -> list:
     """
     Reverse the protein sequences from one-letter to three-letter format and vice-versa
 
-    Case 1: get three-letter sequence
+    Case 1: get three-letter sequence\n
     Use one-letter amino-acids sequences of any letter case
-        
-    Case 2: get one-letter sequence
-    Use three-letter amino-acid separated by '-' sequences 
-        
+
+    Case 2: get one-letter sequence\n
+    Use three-letter amino-acid separated by "-" sequences.
+    Please note that sequences without "-" are parsed as one-letter code sequences\n
+    Example: for sequence "Ala" function will return "Ala-leu-ala"
+
     Arguments:
-    - sequences (tuple(str) or list(str)): protein sequences to convert
-    Example: ['WAG', 'MkqRe', 'msrlk', 'Met-Ala-Gly', 'Met-arg-asn-Trp-Ala-Gly', 'arg-asn-trp']
+    - sequences (tuple[str] or list[str]): protein sequences to convert\n
+    Example: ["WAG", "MkqRe", "msrlk", "Met-Ala-Gly", "Met-arg-asn-Trp-Ala-Gly", "arg-asn-trp"]
 
     Return:
-    - list: one-letter/three-letter protein sequences 
-    Example: ['Met-Ala-Gly', 'Met-arg-asn-Trp-Ala-Gly', 'arg-asn-trp', 'WAG', 'MkqRe', 'rlk']
+    - list: one-letter/three-letter protein sequences\n
+    Example: ["Met-Ala-Gly", "Met-arg-asn-Trp-Ala-Gly", "arg-asn-trp", "WAG", "MkqRe", "rlk"]
     """
     inversed_sequences = []
     for sequence in sequences:
@@ -46,47 +48,46 @@ def define_molecular_weight(sequences: str) -> dict:
     - a sum of masses of each atom constituting a molecule
     - expressed in units called daltons (Da)
     - rounded to hundredths
-    
+
     Arguments:
-    - sequences (tuple(str) or list(str)): protein sequences to convert
+    - sequences (tuple[str] or list[str]): protein sequences to convert
 
     Return:
-    - dictionary: protein sequences as keys and molecular masses as values
-    Example: {'WAG': 332.39, 'MkqRe': 690.88, 'msrlk': 633.86}
+    - dictionary: protein sequences as keys and molecular masses as values\n
+    Example: {"WAG": 332.39, "MkqRe": 690.88, "msrlk": 633.86}
     """
     sequences_weights = {}
     for sequence in sequences:
         sequence_weight = 0
         for letter in sequence:
-            sequence_weight += amino_acid_weights[letter.upper()]
-        sequence_weight -= (len(sequence) - 1) * 18 #deduct water from peptide bond
-        sequences_weights[sequence] = round(sequence_weight, 2) 
+            sequence_weight += dictionaries.amino_acid_weights[letter.upper()]
+        sequence_weight -= (len(sequence) - 1) * 18  # deduct water from peptide bond
+        sequences_weights[sequence] = round(sequence_weight, 2)
     return sequences_weights
 
 
 def search_for_motifs(
-    sequences: (tuple(str) or list(str)), motif: str, overlapping: bool
+    sequences: (tuple[str] or list[str]), motif: str, overlapping: bool
 ) -> dict:
     """
     Search for motifs - conserved amino acids residues in protein sequence
 
-    Search for one motif at a time
-    Search is letter case sensitive
-    Use one-letter aminoacids code for desired sequences and motifs
-    Positions of AA in sequences are counted from 0
+    Search for one motif at a time\n
+    Search is letter case sensitive\n
+    Use one-letter aminoacids code for desired sequences and motifs\n
+    Positions of AA in sequences are counted from 0\n
     By default, overlapping matches are counted
 
-
     Arguments:
-    - sequences (tuple(str) or list(str)): sequences to check for given motif within
+    - sequences (tuple[str] or list[str]): sequences to check for given motif within\n
         Example: sequences = ["AMGAGW", "GAWSGRAGA"]
-    - motif (str): desired motif to check presense in every given sequence
+    - motif (str]: desired motif to check presense in every given sequence\n
         Example: motif = "GA"
-    - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)
+    - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)\n
         Example: overlapping = False
     Return:
-    - dictionary: sequences (str) as keys , starting positions for presented motif (list) as values
-        Example: {'AMGAGW': [2], 'GAWSGRAGA': [0, 7]}
+    - dictionary: sequences (str] as keys , starting positions for presented motif (list) as values\n
+        Example: {"AMGAGW": [2], "GAWSGRAGA": [0, 7]}
     """
     new_line = "\n"
     all_positions = {}
@@ -116,21 +117,21 @@ def search_for_motifs(
     return all_positions
 
 
-
 def search_for_alt_frames(sequences: str, alt_start_aa: str) -> dict:
     """
     Search for alternative frames in a protein sequences
 
-    Without an alt_start_aa argument search for frames that start with methionine ('M')
-    To search frames with alternative start codon add alt_start_aa argument
+    Search is not letter case sensitive\n
+    Without an alt_start_aa argument search for frames that start with methionine ("M")
+    To search frames with alternative start codon add alt_start_aa argument\n
     In alt_start_aa argument use one-letter code
 
     The function ignores the last three amino acids in sequences
 
     Arguments:
-    - sequences (tuple(str) or list(str)): sequences to check
-    - alt_start_aa (str): the name of an amino acid that is encoded by alternative start codon (Optional)
-    Example: alt_start_aa = 'I'
+    - sequences (tuple[str] or list[str]): sequences to check
+    - alt_start_aa (str]: the name of an amino acid that is encoded by alternative start AA (Optional)\n
+    Example: alt_start_aa = "I"
 
     Return:
     - dictionary: the number of a sequence and a collection of alternative frames
@@ -153,17 +154,17 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str) -> dict:
     """
     Convert protein sequences to RNA or DNA sequences.
 
-    Use the most frequent codons in human. The source - https://www.genscript.com/tools/codon-frequency-table
-    All nucleic acids (DNA and RNA) are showed in 5'-3' direction
+    Use the most frequent codons in human. The source - https://www.genscript.com/tools/codon-frequency-table\n
+    All nucleic acids (DNA and RNA) are showed in 5"-3" direction
 
     Arguments:
-    - sequences (tuple(str) or list(str)): sequences to convert
-    - nucl_acids (str): the nucleic acid that is prefered
-    Example: nucl_acids = 'RNA' - convert to RNA
-                     nucl_acids = 'DNA' - convert to DNA
-                     nucl_acids = 'both' - convert to RNA and DNA
+    - sequences (tuple[str] or list[str]): sequences to convert
+    - nucl_acids (str]: the nucleic acid that is prefered\n
+    Example: nucl_acids = "RNA" - convert to RNA\n
+                   nucl_acids = "DNA" - convert to DNA\n
+                   nucl_acids = "both" - convert to RNA and DNA
     Return:
-    - dictionary: output the name of nucleic acid and a collection of sequences
+    - dictionary: nucleic acids (str) as keys, collection of sequences (list) as values
     """
     rule_of_translation = sequences[0].maketrans(dictionaries.translation_rule)
     rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
@@ -195,15 +196,14 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str) -> dict:
 
 
 def check_and_parse_user_input(
-    sequences: list(str) or tuple(str), **kwargs
+    sequences: list[str] or tuple[str], **kwargs
 ) -> dict and str:
     """
-    Check if user input can be correctly processed
-    Provide arguments for desired procedures
-    Needed for main function to correctly call desired procedure
-
+    Check if user input can be correctly processed\n
+    Parse sequences and arguments for desired procedure
+    
     Arguments:
-    - sequences (list(str) or tuple(str)): sequences to process
+    - sequences (list[str] or tuple[str]): sequences to process
     - **kwargs - needed arguments for completion of desired procedure
 
     Return:
@@ -216,13 +216,13 @@ def check_and_parse_user_input(
     if procedure not in procedures_to_functions.keys():
         raise ValueError("Wrong procedure")
     allowed_inputs = set(dictionaries.amino_acids.keys()).union(
-        set(dictionaries.amino_acids.values()).union(set("-"))
+        set(dictionaries.amino_acids.values())
     )
+    allowed_inputs.add("-")
     if procedure != "three_one_letter_code":
-        allowed_inputs.remove("-")
         allowed_inputs -= set(dictionaries.amino_acids.values())
     for sequence in sequences:
-        allowed_inputs_seq = allowed_inputs
+        allowed_inputs_seq = allowed_inputs.copy()
         if procedure == "three_one_letter_code" and "-" in sequence:
             allowed_inputs_seq -= set(dictionaries.amino_acids.keys())
             if not all(
@@ -230,6 +230,8 @@ def check_and_parse_user_input(
             ):
                 raise ValueError("Invalid sequence given")
         else:
+            allowed_inputs_seq.remove("-")
+            allowed_inputs_seq -= set(dictionaries.amino_acids.values())
             if not all(aminoacids in allowed_inputs_seq for aminoacids in sequence):
                 raise ValueError("Invalid sequence given")
     procedure_arguments = {}
@@ -258,44 +260,48 @@ def check_and_parse_user_input(
     return procedure_arguments, procedure
 
 
-def run_protein_tools(sequences: list(str) or tuple(str), **kwargs: str):
+def run_protein_tools(sequences: list[str] or tuple[str], **kwargs: str):
     """
-    Main function to process protein sequence by one of the developed tools.
+    Main function to process protein sequence by one of the developed tools.\n
     Run one procedure at a time:
     - Search for conserved amino acids residues in protein sequence
     - Search for alternative frames in a protein sequences
     - Convert protein sequences to RNA or DNA sequences
-    -
+    - Reverse the protein sequences from one-letter to three-letter format and vice-versa
+    - Define molecular weight of the protein sequences
 
-    All functions are letter case sensitive
-    Provide protein sequence in one letter code.
-    You can obtain one letter code from three letter code with *three_one_letter_code*
-    If more information needed please see Readme or desired docstring
+    All functions except *search_for_alt_frames* are letter case sensitive\n
+    Provide protein sequence in one letter code.\n
+    You can obtain one letter code from three letter code with *three_one_letter_code*\n
+    If more information needed please see README or desired docstring
 
     Arguments:
-    - sequences (list(str) or tuple(str)): sequences to process
-    - procedure (str): desired procedure:
+    - sequences (list[str] or tuple[str]): sequences to process
+    - procedure (str]: desired procedure:
         - "search_for_motifs"
         - "search_for_alt_frames"
         - "convert_to_nucl_acids"
         - "three_one_letter_code"
         - "define_molecular_weight"
+
     For "search_for_motif" procedure provide:
-        - motif (str): desired motif to check presense in every given sequence
+    - motif (str]: desired motif to check presense in every given sequence\n
             Example: motif = "GA"
-        - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)
+    - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)\n
             Example: overlapping = False
+
     For "search_for_alt_frames" procedure provide:
-        - alt_start_aa (str): the name of an amino acid that is encoded by alternative start codon (Optional)
-            Example: alt_start_aa = 'I'
+    - alt_start_aa (str]: the name of an amino acid that is encoded by alternative start codon (Optional)\n
+            Example: alt_start_aa = "I"
+
     For "convert_to_nucl_acids" procedure provide:
-        - nucl_acids (str): the nucleic acid to convert to
-            Example: nucl_acids = 'RNA'
-                     nucl_acids = 'DNA'
-                     nucl_acids = 'both'
+    - nucl_acids (str]: the nucleic acid to convert to\n
+            Example: nucl_acids = "RNA"\n
+                           nucl_acids = "DNA"\n
+                           nucl_acids = "both"
 
     Return:
-    - dict: Dictionary with processed sequences. Depends on desired tool
+    - dict: Dictionary with processed sequences. Depends on desired tool\n
             Please see Readme or desired docstring
     """
     procedure_arguments, procedure = check_and_parse_user_input(sequences, **kwargs)

From d3b21d1aa030fdcd931c5c873528f320f8fcebf5 Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 02:20:52 +0300
Subject: [PATCH 32/36] Add mminor fixes

---
 HW4_Grigoriants/protein_tools.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 11e1adc..0de1815 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -248,7 +248,7 @@ def check_and_parse_user_input(
             procedure_arguments["alt_start_aa"] = "M"
         else:
             if len(kwargs["alt_start_aa"]) > 1:
-                raise ValueError("Invalid start AA!")
+                raise ValueError("Invalid alternative start AA")
             procedure_arguments["alt_start_aa"] = kwargs["alt_start_aa"]
     elif procedure == "convert_to_nucl_acids":
         if "nucl_acids" not in kwargs.keys():

From 7412e71e4d44ded2e483442d5d630508bbaec282 Mon Sep 17 00:00:00 2001
From: Vladimir Grigoriants <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 04:18:33 +0400
Subject: [PATCH 33/36] Update README.md: add information, pictures, team photo

---
 HW4_Grigoriants/README.md | 185 ++++++++++++++++++++++++++++++--------
 1 file changed, 149 insertions(+), 36 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 227f9eb..3248949 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -4,57 +4,160 @@
 *Proteins* are under the constant focus of scientists. Currently, there are an enormous amount of tools to operate with nucleotide sequences, however, the same ones for proteins are extremely rare. 
 
 
-`Protein_tools.py` is an open-source program that facilitates working with protein sequences. 
-
-*В моём представлении здесь должна быть картинка*
+`protein_tools.py` is an open-source program that facilitates working with protein sequences. 
 
 ## Usage
-The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences**, a name of procedure and a relevant argument. If you have three-letter amino acids sequences you could convert them by using `three_one_letter_code` procedure in advance. Before using this procedure, check the *Options*.
+The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences** (not only), a name of procedure and a relevant argument. If you have three-letter amino acids sequences you could convert them by using `three_one_letter_code` procedure in advance. Before using this procedure, check the *Options* and *Examples*.
 
 To start with the program run the following command:
 
-`run_protein_tools([sequence_1, sequence_2 ..., sequence_n], procedure, ...)`
+`run_protein_tools(sequences, procedure="procedure", ...)`
 
 Where:
-- [sequence_1, sequence_2 ..., sequence_n] - a list of protein sequences
-- procedure - a type of procedure to use that is inputed in *string* type
-- ... - an additional argument that is to be inputed in *string* type
+- sequences - positional argument, a list of protein sequences
+- procedure - keyword argument, a type of procedure to use that is inputed in *string* type
+- ... - an additional keyword arguments that are to be inputed in *string* type
 
 ## Options
 
-The program has five types of procedures:
+The program has five types of procedures, for more information please see provided docstrings:
 
  `three_one_letter_code`
+ 
+ ![image](https://drive.google.com/uc?export=view&id=1eACjU_CXFbqeu1iW3ekwcg81n-X3WvTG)
 
 - The main aim - to convert three-letter amino acid sequences to one-letter ones and vice-versa
 - In case of three-to-one translation the names of amino acids **must be separated with hyphen**
 - An additional argument: no
+```
+"""
+Reverse the protein sequences from one-letter to three-letter format and vice-versa
+
+Case 1: get three-letter sequence\n
+Use one-letter amino-acids sequences of any letter case
+
+Case 2: get one-letter sequence\n
+Use three-letter amino-acid separated by "-" sequences.
+Please note that sequences without "-" are parsed as one-letter code sequences\n
+Example: for sequence "Ala" function will return "Ala-leu-ala"
+
+Arguments:
+- sequences (tuple[str] or list[str]): protein sequences to convert\n
+Example: ["WAG", "MkqRe", "msrlk", "Met-Ala-Gly", "Met-arg-asn-Trp-Ala-Gly", "arg-asn-trp"]
+
+Return:
+- list: one-letter/three-letter protein sequences\n
+Example: ["Met-Ala-Gly", "Met-arg-asn-Trp-Ala-Gly", "arg-asn-trp", "WAG", "MkqRe", "rlk"]
+"""
+```
 
  `define_molecular_weight` 
+ 
+ ![image](https://drive.google.com/uc?export=view&id=1i9_4ys64XsAxnw-08zbgyBQnGzJoGJfr)
 
 - The main aim - to determine the exact molecular weight of protein sequences
 - An additional argument: no
+```
+"""
+Define molecular weight of the protein sequences
+
+Use one-letter amino-acids sequences of any letter case
+The molecular weight is:
+- a sum of masses of each atom constituting a molecule
+- expressed in units called daltons (Da)
+- rounded to hundredths
+
+Arguments:
+- sequences (tuple[str] or list[str]): protein sequences to convert
+
+Return:
+- dictionary: protein sequences as keys and molecular masses as values\n
+Example: {"WAG": 332.39, "MkqRe": 690.88, "msrlk": 633.86}
+"""
+```
 
- `check_for_motifs` 
+ `search_for_motifs` 
 
-- The main aim - to search for the motif of interest in protein sequences
-- An additional argument: motif (*str*)
+  ![image](https://drive.google.com/uc?export=view&id=1_bVKRn4RblrfukIxoQc0NZ_FXaJliGAH)
 
+- The main aim - to search for the motif of interest in protein sequences
+- An additional arguments: motif (*str*), overlapping (*str*)
+```
+"""
+Search for motifs - conserved amino acids residues in protein sequence
+
+Search for one motif at a time\n
+Search is letter case sensitive\n
+Use one-letter aminoacids code for desired sequences and motifs\n
+Positions of AA in sequences are counted from 0\n
+By default, overlapping matches are counted
+
+Arguments:
+- sequences (tuple[str] or list[str]): sequences to check for given motif within\n
+Example: sequences = ["AMGAGW", "GAWSGRAGA"]
+- motif (str]: desired motif to check presense in every given sequence\n
+Example: motif = "GA"
+- overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)\n
+Example: overlapping = False
+Return:
+- dictionary: sequences (str] as keys , starting positions for presented motif (list) as values\n
+Example: {"AMGAGW": [2], "GAWSGRAGA": [0, 7]}
+"""
+```
  `search_for_alt_frames` 
  
+ ![image](https://drive.google.com/uc?export=view&id=1AdXnkRDIRiC_5yiiI2qiAMSMWbZf1RIm)
+
 - The main aim - to look for alternative frames that start with methyonine or other non-canonical start amino acids
 - Ignores the last three amino acids due to the insignicance of alternative frames of this length
 - An additional argument: alt_start_aa (*str*)
 - Use alt_start_aa **only for non-canonical start amino acids**
 - Without alt_start_aa the procedure find alternative frames that start with methyonine
+```
+"""
+Search for alternative frames in a protein sequences
+
+Search is not letter case sensitive\n
+Without an alt_start_aa argument search for frames that start with methionine ("M")
+To search frames with alternative start codon add alt_start_aa argument\n
+In alt_start_aa argument use one-letter code
+
+The function ignores the last three amino acids in sequences
+
+Arguments:
+- sequences (tuple[str] or list[str]): sequences to check
+- alt_start_aa (str]: the name of an amino acid that is encoded by alternative start AA (Optional)\n
+Example: alt_start_aa = "I"
 
+Return:
+- dictionary: the number of a sequence and a collection of alternative frames
+"""
+```
 `convert_to_nucl_acids` 
+ 
+ ![image](https://drive.google.com/uc?export=view&id=1_pZJ0Gc-EVcR1zddpDW4Ok3w8t65fW_z)
 
 - The main aim - to convert protein sequences to DNA, RNA or both nucleic acid sequences
 - The program use the most frequent codons in human that could be found [here](https://www.genscript.com/tools/codon-frequency-table)
 - An additional argument: nucl_acids (*str*)
 - Use as nucl_acids only DNA, RNA or both (for more detailes, check *Examples*)
-  
+```
+"""
+Convert protein sequences to RNA or DNA sequences.
+
+Use the most frequent codons in human. The source - https://www.genscript.com/tools/codon-frequency-table\n
+All nucleic acids (DNA and RNA) are showed in 5"-3" direction
+
+Arguments:
+- sequences (tuple[str] or list[str]): sequences to convert
+- nucl_acids (str]: the nucleic acid that is prefered\n
+Example: nucl_acids = "RNA" - convert to RNA\n
+               nucl_acids = "DNA" - convert to DNA\n
+               nucl_acids = "both" - convert to RNA and DNA
+Return:
+- dictionary: nucleic acids (str) as keys, collection of sequences (list) as values
+"""
+```
 
 ## Examples
 ```python
@@ -62,27 +165,33 @@ The program has five types of procedures:
 run_protein_tools(['met-Asn-Tyr', 'Ile-Ala-Ala'], procedure='three_one_letter_code')  # ['mNY', 'IAA']
 run_protein_tools(['mNY','IAA'], procedure='three_one_letter_code')  # ['met-Asn-Tyr', 'Ile-Ala-Ala']
 
+
 # define_molecular_weight
-run_protein_tools(['MNY','IAA'], procedure='define_molecular_weight')  # [462.52000000000004, 309.35]
+run_protein_tools(['MNY','IAA'], procedure='define_molecular_weight')  # {'MNY': 426.52, 'IAA': 273.35}
+
 
 # check_for_motifs
-run_protein_tools(['mNY','IAA'], procedure='check_for_motifs', motif='NY')
-# Sequence: mNY
-# Motif: NY
-# Motif is present in protein sequence starting at positions: 1
-# Sequence: IAA
-# Motif: NY
-# Motif is not present in protein sequence
-# {'mNY': [1], 'IAA': []}
+run_protein_tools(['mNY','IAA'], procedure='search_for_motifs', motif='NY')
+#Sequence: mNY
+#Motif: NY
+#Motif is present in protein sequence starting at positions: 1
+
+#Sequence: IAA
+#Motif: NY
+#Motif is not present in protein sequence
+
+{'mNY': [1], 'IAA': []}
+
 
 # search_for_alt_frames
 run_protein_tools(['mNYQTMSPYYDMId'], procedure='search_for_alt_frames')  # {'mNYQTMSPYYDMId': ['MSPYYDMId']}
 run_protein_tools(['mNYTQTSP'], procedure='search_for_alt_frames', alt_start_aa='T')  # {'mNYTQTSP': ['TQTSP']}
 
+
 # convert_to_nucl_acids
 run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'RNA')  # {'RNA': ['AUGAACUAU']}
-run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'DNA')  # {'DNA': ['ATGAACTAT']}
-run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both') # {'RNA': ['AUGAACUAU'], 'DNA': ['ATGAACTAT']}
+run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'DNA')  # {'DNA': ['TACTTGATA']}
+run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both') # {'RNA': ['AUGAACUAU'], 'DNA': ['TACTTGATA']}
 
 ```
 
@@ -91,19 +200,23 @@ run_protein_tools(['MNY'], procedure='convert_to_nucl_acids', nucl_acids = 'both
 |  Type of the problem                                             |  Probable cause
 | ------------------------------------------------------------ |--------------------
 | Output does not correspond the expected resultes             | The name of procedure is wrong. You see the results of another procedure
-| ValueError: No sequences provided                          | A list of sequences are not inputed
-| ValueError: Wrong procedure                                | The procedure does not exist in this program
+| ValueError: No sequences provided                            | A list of sequences are not inputed
+| ValueError: Wrong procedure                                  | The procedure does not exist in this program
 | TypeError: takes from 0 to 1 positional arguments but n were given  | Sequences are not collected into the list type
-| ValueError: Invalid sequence given                         | The sequences do not correspond to standard amino acid code
-| ValueError: Please provide desired motif                   | There are no an additional argument *motif* in `check_for_motifs`
-| ValueError: Invalid start AA!                              | There is more than one letter in an additional argument *alt_start_aa* in `search_for_alt_frames`
-| ValueError: Please provide desired type of nucl_acids      | There are no an additional argument *nucl_acids* in `convert_to_nucl_acids`
-| ValueError: Invalid nucl_acids argument                    | An additional argument in `convert_to_nucl_acids` is written incorrectly
-## Contacts
-Authors:
+| ValueError: Invalid sequence given                           | The sequences do not correspond to standard amino acid code
+| ValueError: Please provide desired motif                     | There are no an additional argument *motif* in `search_for_motifs`
+| ValueError: Invalid start AA                                 | There is more than one letter in an additional argument *alt_start_aa* in `search_for_alt_frames`
+| ValueError: Please provide desired type of nucl_acids        | There are no an additional argument *nucl_acids* in `convert_to_nucl_acids`
+| ValueError: Invalid nucl_acids argument                      | An additional argument in `convert_to_nucl_acids` is written incorrectly
+## Contacts 
+Vladimir Grigoriants (vova.grig2002@gmail.com)
+Team-leader. Bioinformatician, immunologist, MiLaborary inc. TCR-libraries QC developer 
 
-Vladimir Grigoriants (*адрес*)
+Ekaterina Shitik (shitik.ekaterina@gmail.com)
+Doctor of medicine, molecular biologist with the main interests on gene engineering, AAV vectors and CRISPR/Cas9 technologies
 
-Tulyavko Vlada (*адрес*)
+Vlada Tuliavko (vladislavi27@gmail.com)
+MiLaboratory inc. manager&designer, immunologist
 
-Ekaterina Shitik (shitik.ekaterina@gmail.com)
+## Our team
+![image](https://drive.google.com/uc?export=view&id=1tdSGpNl6GorFPZIqweB0PaGxQW5wK5Oo)

From 4d23561a812b379525787727124c5c55d4d10cb9 Mon Sep 17 00:00:00 2001
From: Vladimir Grigoriants <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 04:28:35 +0400
Subject: [PATCH 34/36] Update README.md

---
 HW4_Grigoriants/README.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index 3248949..aff897e 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -7,7 +7,7 @@
 `protein_tools.py` is an open-source program that facilitates working with protein sequences. 
 
 ## Usage
-The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences** (not only), a name of procedure and a relevant argument. If you have three-letter amino acids sequences you could convert them by using `three_one_letter_code` procedure in advance. Before using this procedure, check the *Options* and *Examples*.
+The programm is based on `run_protein_tools` function that takes the list of **one-letter amino acid sequences**,  a name of procedure and a relevant argument. If you have three-letter amino acids sequences you could convert them by using `three_one_letter_code` procedure in advance. Please convert your three-letter coded sequences with `three_one_letter_code` procedure before using any other procedures on them.
 
 To start with the program run the following command:
 
@@ -17,7 +17,8 @@ Where:
 - sequences - positional argument, a list of protein sequences
 - procedure - keyword argument, a type of procedure to use that is inputed in *string* type
 - ... - an additional keyword arguments that are to be inputed in *string* type
-
+- 
+Before start, check the *Options* and *Examples*.
 ## Options
 
 The program has five types of procedures, for more information please see provided docstrings:
@@ -81,7 +82,7 @@ Example: {"WAG": 332.39, "MkqRe": 690.88, "msrlk": 633.86}
   ![image](https://drive.google.com/uc?export=view&id=1_bVKRn4RblrfukIxoQc0NZ_FXaJliGAH)
 
 - The main aim - to search for the motif of interest in protein sequences
-- An additional arguments: motif (*str*), overlapping (*str*)
+- An additional arguments: motif (*str*), overlapping (*bool*)
 ```
 """
 Search for motifs - conserved amino acids residues in protein sequence

From dd6f4a63cf6de7178085abdc5ba043d916759ff4 Mon Sep 17 00:00:00 2001
From: Vladimir Grigoriants <vova.grig2002@gmail.com>
Date: Sun, 1 Oct 2023 04:30:13 +0400
Subject: [PATCH 35/36] Update README.md

---
 HW4_Grigoriants/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HW4_Grigoriants/README.md b/HW4_Grigoriants/README.md
index aff897e..c2c6801 100644
--- a/HW4_Grigoriants/README.md
+++ b/HW4_Grigoriants/README.md
@@ -216,7 +216,7 @@ Team-leader. Bioinformatician, immunologist, MiLaborary inc. TCR-libraries QC de
 Ekaterina Shitik (shitik.ekaterina@gmail.com)
 Doctor of medicine, molecular biologist with the main interests on gene engineering, AAV vectors and CRISPR/Cas9 technologies
 
-Vlada Tuliavko (vladislavi27@gmail.com)
+Vlada Tuliavko (vladislavi2742@gmail.com)
 MiLaboratory inc. manager&designer, immunologist
 
 ## Our team

From a3bec1bea04a01ce355a1452401ed6bc8ee31b3b Mon Sep 17 00:00:00 2001
From: VovaGrig <vova.grig2002@gmail.com>
Date: Sat, 14 Oct 2023 22:09:43 +0300
Subject: [PATCH 36/36] Add fixes based on feedback to dictionaries.py and
 protein_tools.py

---
 HW4_Grigoriants/dictionaries.py  |  46 +--------
 HW4_Grigoriants/protein_tools.py | 157 ++++++++++++++-----------------
 2 files changed, 72 insertions(+), 131 deletions(-)

diff --git a/HW4_Grigoriants/dictionaries.py b/HW4_Grigoriants/dictionaries.py
index f4a1ada..c5725d1 100644
--- a/HW4_Grigoriants/dictionaries.py
+++ b/HW4_Grigoriants/dictionaries.py
@@ -1,4 +1,4 @@
-amino_acids = {
+AMINO_ACIDS = {
     "A": "Ala",
     "C": "Cys",
     "D": "Asp",
@@ -19,70 +19,30 @@
     "V": "Val",
     "W": "Trp",
     "Y": "Tyr",
-    "a": "ala",
-    "c": "cys",
-    "d": "asp",
-    "e": "glu",
-    "f": "phe",
-    "g": "gly",
-    "h": "his",
-    "i": "ile",
-    "k": "lys",
-    "l": "leu",
-    "m": "met",
-    "n": "asn",
-    "p": "pro",
-    "q": "gln",
-    "r": "arg",
-    "s": "ser",
-    "t": "thr",
-    "v": "val",
-    "w": "trp",
-    "y": "tyr",
 }
-translation_rule = {
+TRANSLATION_RULE = {
     "F": "UUU",
-    "f": "uuu",
     "L": "CUG",
-    "l": "cug",
     "I": "AUU",
-    "i": "auu",
     "M": "AUG",
-    "m": "aug",
     "V": "GUG",
-    "v": "gug",
     "P": "CCG",
-    "p": "ccg",
     "T": "ACC",
-    "t": "acc",
     "A": "GCG",
-    "a": "gcg",
     "Y": "UAU",
-    "y": "uau",
     "H": "CAU",
-    "h": "cau",
     "Q": "CAG",
-    "q": "cag",
     "N": "AAC",
-    "n": "aac",
     "K": "AAA",
-    "k": "aaa",
     "D": "GAU",
-    "d": "gau",
     "E": "GAA",
-    "e": "gaa",
     "C": "UGC",
-    "c": "ugc",
     "W": "UGG",
-    "w": "ugg",
     "R": "CGU",
-    "r": "cgu",
     "S": "AGC",
-    "s": "agc",
     "G": "GGC",
-    "g": "ggc",
 }
-amino_acid_weights = {
+AMINO_ACID_WEIGHTS = {
     "A": 89.09,
     "C": 121.16,
     "D": 133.10,
diff --git a/HW4_Grigoriants/protein_tools.py b/HW4_Grigoriants/protein_tools.py
index 0de1815..df92cef 100644
--- a/HW4_Grigoriants/protein_tools.py
+++ b/HW4_Grigoriants/protein_tools.py
@@ -1,7 +1,7 @@
 import dictionaries
 
 
-def three_one_letter_code(sequences: str) -> list:
+def three_one_letter_code(sequences: (tuple[str] or list[str])) -> list:
     """
     Reverse the protein sequences from one-letter to three-letter format and vice-versa
 
@@ -23,23 +23,35 @@ def three_one_letter_code(sequences: str) -> list:
     """
     inversed_sequences = []
     for sequence in sequences:
-        inversed_sequence = ""
+        inversed_sequence = []
         if "-" not in sequence:
             for letter in sequence:
-                inversed_sequence += dictionaries.amino_acids[letter] + "-"
-            inversed_sequence = inversed_sequence[:-1]
-            inversed_sequences.append(inversed_sequence)
+                if letter.islower():
+                    inversed_sequence.append(
+                        dictionaries.AMINO_ACIDS[letter.capitalize()].lower()
+                    )
+                else:
+                    inversed_sequence.append(dictionaries.AMINO_ACIDS[letter])
+            inversed_sequences.append("-".join(inversed_sequence))
         else:
             aa_splitted = sequence.split("-")
             for aa in aa_splitted:
-                inversed_sequence += list(dictionaries.amino_acids.keys())[
-                    list(dictionaries.amino_acids.values()).index(aa)
-                ]
-            inversed_sequences.append(inversed_sequence)
+                aa_index = list(dictionaries.AMINO_ACIDS.values()).index(
+                    aa.capitalize()
+                )
+                if aa[0].islower():
+                    inversed_sequence.append(
+                        list(dictionaries.AMINO_ACIDS.keys())[aa_index].lower()
+                    )
+                else:
+                    inversed_sequence.append(
+                        list(dictionaries.AMINO_ACIDS.keys())[aa_index]
+                    )
+            inversed_sequences.append("".join(inversed_sequence))
     return inversed_sequences
 
 
-def define_molecular_weight(sequences: str) -> dict:
+def define_molecular_weight(sequences: (tuple[str] or list[str])) -> dict:
     """
     Define molecular weight of the protein sequences
 
@@ -60,7 +72,7 @@ def define_molecular_weight(sequences: str) -> dict:
     for sequence in sequences:
         sequence_weight = 0
         for letter in sequence:
-            sequence_weight += dictionaries.amino_acid_weights[letter.upper()]
+            sequence_weight += dictionaries.AMINO_ACID_WEIGHTS[letter.upper()]
         sequence_weight -= (len(sequence) - 1) * 18  # deduct water from peptide bond
         sequences_weights[sequence] = round(sequence_weight, 2)
     return sequences_weights
@@ -117,7 +129,9 @@ def search_for_motifs(
     return all_positions
 
 
-def search_for_alt_frames(sequences: str, alt_start_aa: str) -> dict:
+def search_for_alt_frames(
+    sequences: (tuple[str] or list[str]), alt_start_aa: str
+) -> dict:
     """
     Search for alternative frames in a protein sequences
 
@@ -150,7 +164,9 @@ def search_for_alt_frames(sequences: str, alt_start_aa: str) -> dict:
     return alternative_frames
 
 
-def convert_to_nucl_acids(sequences: list, nucl_acids: str) -> dict:
+def convert_to_nucl_acids(
+    sequences: (tuple[str] or list[str]), nucl_acids: str
+) -> dict:
     """
     Convert protein sequences to RNA or DNA sequences.
 
@@ -166,27 +182,35 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str) -> dict:
     Return:
     - dictionary: nucleic acids (str) as keys, collection of sequences (list) as values
     """
-    rule_of_translation = sequences[0].maketrans(dictionaries.translation_rule)
-    rule_of_transcription = sequences[0].maketrans("AaUuCcGg", "TtAaGgCc")
+    rule_of_translation = str.maketrans(dictionaries.TRANSLATION_RULE)
+    # add lower case pairs, because only upper case pairs are stored in dictionaries
+    rule_of_translation.update(
+        str.maketrans(
+            dict(
+                (k.lower(), v.lower()) for k, v in dictionaries.TRANSLATION_RULE.items()
+            )
+        )
+    )
     nucl_acid_seqs = {"RNA": [], "DNA": []}
     for sequence in sequences:
         rna_seq = sequence.translate(rule_of_translation)
-        dna_seq = rna_seq.translate(rule_of_transcription)
         if nucl_acids == "RNA":
             nucl_acid_seqs["RNA"].append(rna_seq)
-            if sequence == sequences[-1]:
-                del nucl_acid_seqs["DNA"]
-        if nucl_acids == "DNA":
+        elif nucl_acids == "DNA":
+            dna_seq = rna_seq.replace("U", "T").replace("u", "t")
             nucl_acid_seqs["DNA"].append(dna_seq)
-            if sequence == sequences[-1]:
-                del nucl_acid_seqs["RNA"]
-        if nucl_acids == "both":
+        elif nucl_acids == "both":
+            dna_seq = rna_seq.replace("U", "T").replace("u", "t")
             nucl_acid_seqs["RNA"].append(rna_seq)
             nucl_acid_seqs["DNA"].append(dna_seq)
+    if nucl_acids == "RNA":
+        del nucl_acid_seqs["DNA"]
+    if nucl_acids == "DNA":
+        del nucl_acid_seqs["RNA"]
     return nucl_acid_seqs
 
 
-procedures_to_functions = {
+PROTEINS_PROCEDURES_TO_FUNCTIONS = {
     "search_for_motifs": search_for_motifs,
     "search_for_alt_frames": search_for_alt_frames,
     "convert_to_nucl_acids": convert_to_nucl_acids,
@@ -196,12 +220,12 @@ def convert_to_nucl_acids(sequences: list, nucl_acids: str) -> dict:
 
 
 def check_and_parse_user_input(
-    sequences: list[str] or tuple[str], **kwargs
+    sequences: (str, tuple[str] or list[str]), **kwargs
 ) -> dict and str:
     """
     Check if user input can be correctly processed\n
     Parse sequences and arguments for desired procedure
-    
+
     Arguments:
     - sequences (list[str] or tuple[str]): sequences to process
     - **kwargs - needed arguments for completion of desired procedure
@@ -210,29 +234,34 @@ def check_and_parse_user_input(
     - string: procedure name
     - dictionary: a collection of procedure arguments and their values
     """
-    if len(sequences) == 0:
-        raise ValueError("No sequences provided")
+    if isinstance(sequences, str):
+        sequences = sequences.split()
+    if "" in sequences or len(sequences) == 0:
+        raise ValueError("Empty sequence provided")
     procedure = kwargs["procedure"]
-    if procedure not in procedures_to_functions.keys():
+    if procedure not in PROTEINS_PROCEDURES_TO_FUNCTIONS.keys():
         raise ValueError("Wrong procedure")
-    allowed_inputs = set(dictionaries.amino_acids.keys()).union(
-        set(dictionaries.amino_acids.values())
+    allowed_inputs = set(dictionaries.AMINO_ACIDS.keys())
+    allowed_inputs = allowed_inputs.union(
+        set(k.lower() for k in dictionaries.AMINO_ACIDS.keys())
     )
-    allowed_inputs.add("-")
-    if procedure != "three_one_letter_code":
-        allowed_inputs -= set(dictionaries.amino_acids.values())
+    if procedure == "three_one_letter_code":
+        allowed_inputs = allowed_inputs.union(set(dictionaries.AMINO_ACIDS.values()))
+        allowed_inputs = allowed_inputs.union(
+            set(v.lower() for v in dictionaries.AMINO_ACIDS.values())
+        )
     for sequence in sequences:
         allowed_inputs_seq = allowed_inputs.copy()
         if procedure == "three_one_letter_code" and "-" in sequence:
-            allowed_inputs_seq -= set(dictionaries.amino_acids.keys())
-            if not all(
-                aminoacids in allowed_inputs_seq for aminoacids in sequence.split("-")
-            ):
+            allowed_inputs_seq -= set(dictionaries.AMINO_ACIDS.keys())
+            allowed_inputs_seq -= set(
+                k.lower() for k in dictionaries.AMINO_ACIDS.keys()
+            )
+            allowed_inputs_seq.union(set("-"))
+            if not set(sequence.split("-")).issubset(allowed_inputs_seq):
                 raise ValueError("Invalid sequence given")
         else:
-            allowed_inputs_seq.remove("-")
-            allowed_inputs_seq -= set(dictionaries.amino_acids.values())
-            if not all(aminoacids in allowed_inputs_seq for aminoacids in sequence):
+            if not set(sequence).issubset(allowed_inputs_seq):
                 raise ValueError("Invalid sequence given")
     procedure_arguments = {}
     if procedure == "search_for_motifs":
@@ -258,51 +287,3 @@ def check_and_parse_user_input(
         procedure_arguments["nucl_acids"] = kwargs["nucl_acids"]
     procedure_arguments["sequences"] = sequences
     return procedure_arguments, procedure
-
-
-def run_protein_tools(sequences: list[str] or tuple[str], **kwargs: str):
-    """
-    Main function to process protein sequence by one of the developed tools.\n
-    Run one procedure at a time:
-    - Search for conserved amino acids residues in protein sequence
-    - Search for alternative frames in a protein sequences
-    - Convert protein sequences to RNA or DNA sequences
-    - Reverse the protein sequences from one-letter to three-letter format and vice-versa
-    - Define molecular weight of the protein sequences
-
-    All functions except *search_for_alt_frames* are letter case sensitive\n
-    Provide protein sequence in one letter code.\n
-    You can obtain one letter code from three letter code with *three_one_letter_code*\n
-    If more information needed please see README or desired docstring
-
-    Arguments:
-    - sequences (list[str] or tuple[str]): sequences to process
-    - procedure (str]: desired procedure:
-        - "search_for_motifs"
-        - "search_for_alt_frames"
-        - "convert_to_nucl_acids"
-        - "three_one_letter_code"
-        - "define_molecular_weight"
-
-    For "search_for_motif" procedure provide:
-    - motif (str]: desired motif to check presense in every given sequence\n
-            Example: motif = "GA"
-    - overlapping (bool): count (True) or skip (False) overlapping matches. (Optional)\n
-            Example: overlapping = False
-
-    For "search_for_alt_frames" procedure provide:
-    - alt_start_aa (str]: the name of an amino acid that is encoded by alternative start codon (Optional)\n
-            Example: alt_start_aa = "I"
-
-    For "convert_to_nucl_acids" procedure provide:
-    - nucl_acids (str]: the nucleic acid to convert to\n
-            Example: nucl_acids = "RNA"\n
-                           nucl_acids = "DNA"\n
-                           nucl_acids = "both"
-
-    Return:
-    - dict: Dictionary with processed sequences. Depends on desired tool\n
-            Please see Readme or desired docstring
-    """
-    procedure_arguments, procedure = check_and_parse_user_input(sequences, **kwargs)
-    return procedures_to_functions[procedure](**procedure_arguments)