In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [2]:
dna_seq = 'AGCTTTCGAATTCGA'

In [3]:
# (a)

def is_dna_palindrome(s):
    complement = {'A':'T','T':'A','G':'C','C':'G'}
    return all(complement[s[i]] == s[-(i+1)] for i in range(len(s)))

unique_palindromes = set()
n = len(dna_seq)

for i in range(n):
    for j in range(i+2, n+1):
        substring = dna_seq[i:j]
        if is_dna_palindrome(substring):
            unique_palindromes.add(substring)

unique_palindromes = sorted(unique_palindromes, key=lambda x: (-len(x), x))

print("Unique Palindrome Substrings: ")
for p in unique_palindromes:
    print(f" {p} length: {len(p)}")

Unique Palindrome Substrings: 
 TCGAATTCGA length: 10
 CGAATTCG length: 8
 GAATTC length: 6
 TTCGAA length: 6
 AATT length: 4
 AGCT length: 4
 TCGA length: 4
 AT length: 2
 CG length: 2
 GC length: 2


In [4]:
# (b)

def gc_content(seq):
    gc_count = seq.count('G') + seq.count('C')
    return round((gc_count / len(seq)) * 100, 2)

print("GC content for each palindrome: ")
print("Palindrome Sequence | GC %")
print("-" * 30)
for p in unique_palindromes:
    print(f" {p:20} | {gc_content(p):5.2f}%")

GC content for each palindrome: 
Palindrome Sequence | GC %
------------------------------
 TCGAATTCGA           | 40.00%
 CGAATTCG             | 50.00%
 GAATTC               | 33.33%
 TTCGAA               | 33.33%
 AATT                 |  0.00%
 AGCT                 | 50.00%
 TCGA                 | 50.00%
 AT                   |  0.00%
 CG                   | 100.00%
 GC                   | 100.00%


In [5]:
# (c)

longest_length = max(len(p) for p in unique_palindromes)
longest_palindromes = [p for p in unique_palindromes if len(p) == longest_length]

print(f"\n Longest Palindromic SubString(s) (Length: {longest_length}):")
for p in longest_palindromes:
    print(f" {p} - GC: {gc_content(p)}")


 Longest Palindromic SubString(s) (Length: 10):
 TCGAATTCGA - GC: 40.0
