In [None]:
# Import the SeqIO module from Biopython for reading sequence files
from Bio import SeqIO

# Define a function to calculate GC content of a DNA sequence
def gc_content(seq):
    # Count the number of 'G' and 'C' nucleotides in the sequence
    gc_count = seq.count('G') + seq.count('C')
    # Calculate the percentage of GC content in the sequence
    return (gc_count / len(seq)) * 100

# Loop through each sequence record in the FASTA file
for record in SeqIO.parse("ls_orchid.fasta.fasta", "fasta"):
    # Calculate GC content for the current sequence (converted to string)
    gc = gc_content(str(record.seq))
    # Print the sequence ID and its GC content formatted to 2 decimal places
    print(f"{record.id}: {gc:.2f}% GC")

gi|2765658|emb|Z78533.1|CIZ78533: 59.59% GC
gi|2765657|emb|Z78532.1|CCZ78532: 48.47% GC
gi|2765656|emb|Z78531.1|CFZ78531: 57.09% GC
gi|2765655|emb|Z78530.1|CMZ78530: 47.58% GC
gi|2765654|emb|Z78529.1|CLZ78529: 47.89% GC
gi|2765652|emb|Z78527.1|CYZ78527: 50.70% GC
gi|2765651|emb|Z78526.1|CGZ78526: 50.41% GC
gi|2765650|emb|Z78525.1|CAZ78525: 50.43% GC
gi|2765649|emb|Z78524.1|CFZ78524: 47.70% GC
gi|2765648|emb|Z78523.1|CHZ78523: 50.35% GC
gi|2765647|emb|Z78522.1|CMZ78522: 49.86% GC
gi|2765646|emb|Z78521.1|CCZ78521: 49.04% GC
gi|2765645|emb|Z78520.1|CSZ78520: 49.54% GC
gi|2765644|emb|Z78519.1|CPZ78519: 49.07% GC
gi|2765643|emb|Z78518.1|CRZ78518: 51.52% GC
gi|2765642|emb|Z78517.1|CFZ78517: 49.73% GC
gi|2765641|emb|Z78516.1|CPZ78516: 49.17% GC
gi|2765640|emb|Z78515.1|MXZ78515: 53.73% GC
gi|2765639|emb|Z78514.1|PSZ78514: 56.03% GC
gi|2765638|emb|Z78513.1|PBZ78513: 55.93% GC
gi|2765637|emb|Z78512.1|PWZ78512: 56.17% GC
gi|2765636|emb|Z78511.1|PEZ78511: 56.24% GC
gi|2765635|emb|Z78510.1|PCZ78510

In [None]:
# Import the pandas library for data manipulation and saving to CSV
import pandas as pd

# Create an empty list to store the results for each sequence
results = []

# Loop through each sequence record in the FASTA file
for record in SeqIO.parse("ls_orchid.fasta.fasta", "fasta"):
    # Calculate GC content for the current sequence
    gc = gc_content(str(record.seq))
    # Append the sequence ID and GC content as a dictionary to the results list
    results.append({"ID": record.id, "GC Content": gc})

# Convert the list of dictionaries into a pandas DataFrame
df = pd.DataFrame(results)

# Save the DataFrame to a CSV file named 'gc_content_results.csv' without the index column
df.to_csv("gc_content_results.csv", index=False)