In [None]:
!pip install biopython #Running on Colab, install biopython on Colab
import matplotlib.pyplot as plt
from Bio import SeqIO
from collections import Counter

In [None]:
# Draw the ATCG distribution

# Read sequences
records = list(SeqIO.parse("Suspect.fasta", "fasta"))

# Concatenate all sequences (if there's more than one)
sequence = "".join(str(record.seq).upper() for record in records)

# Count base frequencies
base_counts = Counter(sequence)
bases = ['A', 'T', 'G', 'C']
counts = [base_counts.get(base, 0) for base in bases]

# 1. Histogram
plt.figure(figsize=(6, 4))
plt.bar(bases, counts, color=["#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3"])
plt.title("Nucleotide Frequency Histogram")
plt.xlabel("Base")
plt.ylabel("Count")
plt.grid(True, axis='y', linestyle='--', alpha=0.7)
plt.tight_layout()
plt.show()

# 2. Pie Chart
plt.figure(figsize=(5, 5))
plt.pie(counts, labels=bases, autopct='%1.1f%%', colors=["#66c2a5", "#fc8d62", "#8da0cb", "#e78ac3"])
plt.title("Nucleotide Composition Pie Chart")
plt.tight_layout()
plt.show()

# 3. GC content and other information (optional)
total = sum(counts)
gc_content = (base_counts.get('G', 0) + base_counts.get('C', 0)) / total * 100
print(f"Total Length: {total}")
print(f"GC Content: {gc_content:.2f}%")