In [6]:
import pandas as pd
import numpy as np

def build_markov_transition_matrix(sequence):
    # Define nucleotides
    nucleotides = ['A', 'C', 'G', 'T']
    n = len(nucleotides)
    
    # Initialize transition count matrix
    transition_counts = np.zeros((n, n), dtype=float)
    
    # Count transitions
    for i in range(len(sequence) - 1):
        current_nuc = sequence[i].upper()
        next_nuc = sequence[i + 1].upper()
        if current_nuc in nucleotides and next_nuc in nucleotides:
            row_idx = nucleotides.index(current_nuc)
            col_idx = nucleotides.index(next_nuc)
            transition_counts[row_idx, col_idx] += 1
    
    # Compute transition probabilities
    # Normalize by row sums (total transitions from each nucleotide)
    row_sums = transition_counts.sum(axis=1, keepdims=True)
    # Avoid division by zero; if no transitions, set probabilities to 0
    transition_matrix = np.divide(transition_counts, row_sums, 
                                out=np.zeros_like(transition_counts), 
                                where=row_sums != 0)
    
    # Create a DataFrame for better visualization
    transition_df = pd.DataFrame(transition_matrix, 
                                index=nucleotides, 
                                columns=nucleotides)
    
    return transition_df

def main():
    # Example DNA sequence (replace with your sequence or read from file)
    sequence = "ACGTACGTACGTACGT"  # Placeholder sequence
    
    # If reading from a file, uncomment and update the path
    # with open('sequence.txt', 'r') as f:
    #     sequence = f.read().strip()
    
    # Build and print the transition matrix
    transition_matrix = build_markov_transition_matrix(sequence)
    print("Markov Transition Matrix:")
    print(transition_matrix)

if __name__ == '__main__':
    main()

Markov Transition Matrix:
     A    C    G    T
A  0.0  1.0  0.0  0.0
C  0.0  0.0  1.0  0.0
G  0.0  0.0  0.0  1.0
T  1.0  0.0  0.0  0.0
