<a href="https://colab.research.google.com/github/Quratulain-12/Bioinformatic-services/blob/main/Untitled4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# FASTA/FASTQ Processor
# Calculates statistics and visualizes sequences
# @title Bioinformatics Sequence Analyzer
import libraries
!pip install biopython
from Bio import SeqIO
import matplotlib.pyplot as plt
import pandas as pd
from collections import Counter

print("🦠 FASTA/FASTQ ANALYSIS TOOL")

# @markdown Upload a FASTA/FASTQ file:
from google.colab import files
uploaded = files.upload()
filename = list(uploaded.keys())[0]

# Process sequences
sequences = []
lengths = []
gc_contents = []
nucleotide_counts = {'A':0, 'T':0, 'G':0, 'C':0}

for record in SeqIO.parse(filename, "fasta" if filename.endswith(('.fa', '.fasta')) else "fastq"):
    seq = str(record.seq).upper()
    sequences.append(seq)
    lengths.append(len(seq))
    gc = (seq.count('G') + seq.count('C')) / len(seq) * 100
    gc_contents.append(gc)

    for nt in ['A','T','G','C']:
        nucleotide_counts[nt] += seq.count(nt)

# Generate report
print(f"\n📊 SEQUENCE REPORT ({filename})")
print(f"Total sequences: {len(sequences)}")
print(f"Average length: {sum(lengths)/len(lengths):.1f} bp")
print(f"Average GC content: {sum(gc_contents)/len(gc_contents):.1f}%")

# Visualization
plt.figure(figsize=(15,10))

# Length distribution
plt.subplot(2,2,1)
plt.hist(lengths, bins=30, color='#1b5e20')
plt.title('Sequence Length Distribution')
plt.xlabel('Length (bp)')
plt.ylabel('Count')

# GC content distribution
plt.subplot(2,2,2)
plt.hist(gc_contents, bins=30, color='#388e3c')
plt.title('GC Content Distribution')
plt.xlabel('GC Content (%)')

# Nucleotide composition
plt.subplot(2,2,3)
plt.pie(nucleotide_counts.values(), labels=nucleotide_counts.keys(),
        autopct='%1.1f%%', colors=['#4caf50','#81c784','#2e7d32','#1b5e20'])
plt.title('Overall Nucleotide Composition')

# Save results
df = pd.DataFrame({
    'Sequence ID': [record.id for record in SeqIO.parse(filename, "fasta" if filename.endswith(('.fa', '.fasta')) else "fastq")],
    'Length': lengths,
    'GC Content': gc_contents
})

df.to_csv('sequence_analysis_report.csv', index=False)
files.download('sequence_analysis_report.csv')

print("\n✅ Analysis complete! Report downloaded as CSV")