BRCA1 DNA repair associated [organism=Homo sapiens (human)], Human DNA Sequence


In [None]:
from Bio import SeqIO
from Bio.SeqUtils import gc_fraction


input_file = "gene.fna"        # Input FASTA file
output_file = "output.fasta"      # Output file for filtered sequences
motif = "AGC"                        # Motif to search
gc_min = 44                         # Minimum GC% for filtering
gc_max = 48                         # Maximum GC%
length_threshold = 200             # Minimum sequence length


# Load sequences from FASTA
sequences = list(SeqIO.parse(input_file, "fasta"))

# Store filtered records
filtered = []

# Analyze each sequence
for record in sequences:
    seq = record.seq
    gc = gc_fraction(seq) * 100

    # Reverse complement
    rev_comp = seq.reverse_complement()

    # Motif search
    motif_positions = [i for i in range(len(seq)) if seq[i:i+len(motif)] == motif]

    # Print analysis
    print(f"\nID: {record.id}")
    print(f"Length: {len(seq)}")
    print(f"GC Content: {gc:.2f}%")
    print(f"Motif '{motif}' found at positions: {motif_positions}")
    print(f"Reverse Complement:\n{rev_comp[:60]}...")

    # Filter based on GC% and length
    if gc_min <= gc <= gc_max and len(seq) > length_threshold:
        filtered.append(record)

# Write filtered sequences to a new file
SeqIO.write(filtered, output_file, "fasta")
print(f"\n✅ {len(filtered)} sequences saved to '{output_file}'")



ID: NC_000017.11:c43170327-43044295
Length: 126033
GC Content: 44.10%
Motif 'AGC' found at positions: [61, 84, 163, 208, 223, 229, 266, 348, 351, 421, 472, 578, 646, 752, 854, 893, 945, 994, 1017, 1028, 1076, 1101, 1284, 1305, 1458, 1465, 1477, 1511, 1555, 1601, 1695, 1771, 1788, 1799, 1955, 1968, 2476, 2488, 2559, 2724, 2823, 2877, 2949, 2984, 3075, 3104, 3132, 3189, 3195, 3257, 3390, 3697, 3720, 3741, 3852, 3883, 3932, 3977, 4024, 4027, 4060, 4298, 4345, 4969, 5020, 5076, 5098, 5256, 5285, 5495, 5530, 5536, 5584, 5588, 5641, 5738, 5900, 6082, 6257, 6355, 6376, 6400, 6427, 6442, 6518, 6550, 6568, 6584, 6602, 6746, 6755, 6833, 6848, 6977, 7151, 7154, 7160, 7169, 7178, 7294, 7375, 7422, 7488, 7530, 7554, 7579, 7803, 7846, 7928, 8223, 8268, 8355, 8440, 8445, 8452, 8503, 8524, 8569, 8608, 8635, 8775, 8811, 8822, 8931, 8936, 9074, 9107, 9127, 9170, 9188, 9191, 9362, 9465, 9500, 9538, 9702, 9709, 9802, 9856, 10066, 10121, 10153, 10188, 10202, 10213, 10238, 10340, 10383, 10390, 10412, 10560