In [1]:
from collections import defaultdict

# Create a dictionary to store base counts for each RNA type
rna_base_counts = defaultdict(lambda: defaultdict(int))

# Open the input file
with open("Downloads/test/ncbi_dataset/data/GCF_000931575.1/cds_from_genomic.fna", "r") as file:
    current_rna_type = None  # To keep track of the current RNA type
    sequence = ""  # To store the current sequence
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            # Parse the header line to determine the RNA type
            if "rRNA" in line:
                current_rna_type = "rRNA"
            elif "tRNA" in line:
                current_rna_type = "tRNA"
            elif "mRNA" in line:
                current_rna_type = "mRNA"
            else:
                current_rna_type = "Unknown"  # If RNA type is not specified
            
            sequence = ""  # Initialize the sequence for the current RNA type
        else:
            # Accumulate the sequence data
            sequence += line
            
            # Calculate base counts
            for base in sequence:
                rna_base_counts[current_rna_type][base] += 1

# Print the results
for rna_type, base_counts in rna_base_counts.items():
    print(f"RNA Type: {rna_type}")
    print(f"A: {base_counts['A']}")
    print(f"T: {base_counts['T']}")
    print(f"G: {base_counts['G']}")
    print(f"C: {base_counts['C']}")
    print()


RNA Type: Unknown
A: 4581344
T: 4357960
G: 3065293
C: 2649685

RNA Type: tRNA
A: 250560
T: 226984
G: 176753
C: 152677

RNA Type: rRNA
A: 53241
T: 48941
G: 35531
C: 29642



In [3]:
# Initialize variables to store base counts
total_A_count = 0
total_T_count = 0
total_G_count = 0
total_C_count = 0

# Open the input file
with open("Downloads/GCA_000931575.1_ASM93157v1_rna_from_genomic.fna/GCA_000931575.1_ASM93157v1_rna_from_genomic.fna", "r") as file:
    in_sequence = False  # To indicate whether we are in a sequence or not
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            in_sequence = False  # Start of a new header, so we are not in the sequence
        else:
            in_sequence = True  # We are in the sequence
            
            # Count bases in the sequence
            total_A_count += line.count("A")
            total_T_count += line.count("T")
            total_G_count += line.count("G")
            total_C_count += line.count("C")

# Print the results
print(f"Total A count: {total_A_count}")
print(f"Total T count: {total_T_count}")
print(f"Total G count: {total_G_count}")
print(f"Total C count: {total_C_count}")

# Calculate the total number of bases
total_bases = total_A_count + total_T_count + total_G_count + total_C_count
print(f"Total bases: {total_bases}")


Total A count: 8628
Total T count: 7026
Total G count: 9867
Total C count: 6858
Total bases: 32379


# Biomass Objective Function

In [None]:
0.419109 ala_L[c] + 0.230561 arg_L[c] + 0.254642 asn_L[c] + 0.260278 asp_L[c] + 0.052260 cys_L[c] + 
0.240808 gln_L[c] + 0.335594 glu_L[c] + 0.338156 gly[c] + 0.105033 his_L[c] + 0.363262 ile_L[c] + 
0.533364 leu_L[c] + 0.325347 lys_L[c] + 0.121941 met_L[c] + 0.227487 phe_L[c] + 0.189060 pro_L[c] + 
0.301779 ser_L[c] + 0.266426 thr_L[c] + 0.058409 trp_L[c] + 0.161393 tyr_L[c] + 0.339693 val_L[c]+ 
0.034337 datp[c] + 0.021189 dctp[c] + 0.021189 dgtp[c] + 0.034337 dttp[c] + 0.158909 ctp[c] + 
0.110449 gtp[c] + 0.138955 utp[c] + 54.063155 atp[c] + 0.000223 10fthf[c] + 0.000223 2dmmql8[c] + 
0.000223 2fe2s[c] + 0.000223 4fe4s[c] + 0.000223 5mthf[c] + 0.0002791 accoa[c] +  0.000223 amet[c] +
0.000223 bmocogdp[c] + 2e-006 btn[c] + 0.00477859 ca2[c] + 0.000223 chor[c] + 0.00477859 cl[c] +
0.000168 coa[c] + 0.0031857 cobalt2[c] + 0.0109330 loshinf[c]  + 0.0031857 cu2[c]  + 0.000223 fad[c] +
0.0071678 fe2[c] + 0.0071678 fe3[c]  + 0.000223 gthrd[c]  + 48.825398 h2o[c] + 0.1791974 k[c]  +
0.000223 lipopb[c] + 0.000031266 malcoa[c] + 0.00796433 mg2[c] + 0.000223 mlthf[c] + 0.0031857 mn2[c] +
0.0031857 mobd[c] + 0.000223 mocogdp[c] + 0.001493436 murein3p3p[p] + 0.00074362 murein3px4p[p] +0.00597374 murein4p4p[p] +
0.006692636 murein4px4p[p] + 0.000057202 murein4px4px4p[p] + 0.0017866 nad[c] + 0.00004466 nadh[c] + 0.00011166 nadp[c] +
0.000335 nadph[c] + 0.01194649 nh4[c] + 0.00318573 ni2[c] + 0.03945960 pe140[p] + 0.02916579 pe160[p] +
0.0943599 pe161[p] + 0.0085781 pe180[p] + 0.0233950 pg160[p] + 0.0068808 pg180[p]  + 0.000223 pheme[c]  +
0.0332704 ptrc[c] +0.000223 pydx5p[c] + 0.000223 ribflv[c]  + 0.0039821 so4[c] + 0.00674427 spmd[c] + 
0.00009826 succoa[c] + 0.000223 thf[c] + 0.000223 thmpp[c] + 0.000055352 udcpdp[c]  + 0.00318573 zn2[c] -> 
53.9500 adp[c] + 53.9500 h[c] + 53.9461 pi[c] + 0.6121 ppi[c]

# hi467

In [2]:
# Initialize variables to store base counts
total_A_count = 0
total_T_count = 0
total_G_count = 0
total_C_count = 0

# Open the input file
with open("Downloads/GCA_001975845.1_ASM197584v1_rna_from_genomic.fna/hi467_rna_from_genomic.fna", "r") as file:
    in_sequence = False  # To indicate whether we are in a sequence or not
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            in_sequence = False  # Start of a new header, so we are not in the sequence
        else:
            in_sequence = True  # We are in the sequence
            
            # Count bases in the sequence
            total_A_count += line.count("A")
            total_T_count += line.count("T")
            total_G_count += line.count("G")
            total_C_count += line.count("C")

# Print the results
print(f"Total A count: {total_A_count}")
print(f"Total T count: {total_T_count}")
print(f"Total G count: {total_G_count}")
print(f"Total C count: {total_C_count}")

# Calculate the total number of bases
total_bases = total_A_count + total_T_count + total_G_count + total_C_count
print(f"Total bases: {total_bases}")


Total A count: 2376
Total T count: 2126
Total G count: 2904
Total C count: 2216
Total bases: 9622


# Rdkw20

In [4]:
# Initialize variables to store base counts
total_A_count = 0
total_T_count = 0
total_G_count = 0
total_C_count = 0

# Open the input file
with open("Downloads/GCA_000027305.1_ASM2730v1_rna_from_genomic.fna/Rdkw20_rna_from_genomic.fna", "r") as file:
    in_sequence = False  # To indicate whether we are in a sequence or not
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            in_sequence = False  # Start of a new header, so we are not in the sequence
        else:
            in_sequence = True  # We are in the sequence
            
            # Count bases in the sequence
            total_A_count += line.count("A")
            total_T_count += line.count("T")
            total_G_count += line.count("G")
            total_C_count += line.count("C")

# Print the results
print(f"Total A count: {total_A_count}")
print(f"Total T count: {total_T_count}")
print(f"Total G count: {total_G_count}")
print(f"Total C count: {total_C_count}")

# Calculate the total number of bases
total_bases = total_A_count + total_T_count + total_G_count + total_C_count
print(f"Total bases: {total_bases}")


Total A count: 7800
Total T count: 6223
Total G count: 8791
Total C count: 5859
Total bases: 28673


# KR494

In [5]:
# Initialize variables to store base counts
total_A_count = 0
total_T_count = 0
total_G_count = 0
total_C_count = 0

# Open the input file
with open("Downloads/GCA_000465255.1_ASM46525v1_rna_from_genomic.fna/KR494_rna_from_genomic.fna", "r") as file:
    in_sequence = False  # To indicate whether we are in a sequence or not
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            in_sequence = False  # Start of a new header, so we are not in the sequence
        else:
            in_sequence = True  # We are in the sequence
            
            # Count bases in the sequence
            total_A_count += line.count("A")
            total_T_count += line.count("T")
            total_G_count += line.count("G")
            total_C_count += line.count("C")

# Print the results
print(f"Total A count: {total_A_count}")
print(f"Total T count: {total_T_count}")
print(f"Total G count: {total_G_count}")
print(f"Total C count: {total_C_count}")

# Calculate the total number of bases
total_bases = total_A_count + total_T_count + total_G_count + total_C_count
print(f"Total bases: {total_bases}")


Total A count: 8508
Total T count: 6931
Total G count: 9825
Total C count: 6745
Total bases: 32009


# M12125

In [7]:
# Initialize variables to store base counts
total_A_count = 0
total_T_count = 0
total_G_count = 0
total_C_count = 0

# Open the input file
with open("Downloads/GCF_003351605.1_ASM335160v1_rna_from_genomic.fna/M12125_rna_from_genomic.fna", "r") as file:
    in_sequence = False  # To indicate whether we are in a sequence or not
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            in_sequence = False  # Start of a new header, so we are not in the sequence
        else:
            in_sequence = True  # We are in the sequence
            
            # Count bases in the sequence
            total_A_count += line.count("A")
            total_T_count += line.count("T")
            total_G_count += line.count("G")
            total_C_count += line.count("C")

# Print the results
print(f"Total A count: {total_A_count}")
print(f"Total T count: {total_T_count}")
print(f"Total G count: {total_G_count}")
print(f"Total C count: {total_C_count}")

# Calculate the total number of bases
total_bases = total_A_count + total_T_count + total_G_count + total_C_count
print(f"Total bases: {total_bases}")


Total A count: 9080
Total T count: 7382
Total G count: 10333
Total C count: 7195
Total bases: 33990


# 10211

In [6]:
# Initialize variables to store base counts
total_A_count = 0
total_T_count = 0
total_G_count = 0
total_C_count = 0

# Open the input file
with open("Downloads/GCA_001997355.1_ASM199735v1_rna_from_genomic.fna/10211_rna_from_genomic.fna", "r") as file:
    in_sequence = False  # To indicate whether we are in a sequence or not
    
    for line in file:
        line = line.strip()
        if line.startswith(">"):
            in_sequence = False  # Start of a new header, so we are not in the sequence
        else:
            in_sequence = True  # We are in the sequence
            
            # Count bases in the sequence
            total_A_count += line.count("A")
            total_T_count += line.count("T")
            total_G_count += line.count("G")
            total_C_count += line.count("C")

# Print the results
print(f"Total A count: {total_A_count}")
print(f"Total T count: {total_T_count}")
print(f"Total G count: {total_G_count}")
print(f"Total C count: {total_C_count}")

# Calculate the total number of bases
total_bases = total_A_count + total_T_count + total_G_count + total_C_count
print(f"Total bases: {total_bases}")


Total A count: 2524
Total T count: 2251
Total G count: 3085
Total C count: 2371
Total bases: 10231
