# Code Space for MATH280 Proj. 1 
### Author: Henry Yin

In [1]:
# Import needed python module for file reading
import os

In [2]:
def extract_tones_from_file(file_path):
    """
    Read a text file and extract tones from numbered pinyin syllables.
    Each tone is represented as an integer (1-5) based on numbered pinyin syllables.
    """
    tones = []
    if not os.path.exists(file_path):
        print(f"File {file_path} not found. Please check the file path and try again.")
        return []

    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            line_tones = [int(word[-1]) for word in line.split() if word[-1].isdigit()]
            tones.append(line_tones)
    return tones

## Strategy 1: Log-likelihood Calculation

In [3]:
import sage.all as sa
def construct_markov_matrix(tones_list, num_states=4):
    """
    Create a SageMath Markov matrix for a given list of tone sequences.
    NOTE: The matrix is normalized so that each row sums to 1, which is a bit differ to out definition in the class
    """
    # Initialize with zeros
    transition_counts = sa.Matrix(sa.SR, num_states, num_states, 0)  

    for tones in tones_list:
        for i in range(len(tones) - 1):
            current_tone = tones[i] - 1  # Adjusting to 0-based index
            next_tone = tones[i + 1] - 1  
            transition_counts[current_tone, next_tone] += 1

    for i in range(num_states):
        row_sum = sum(transition_counts[i, j] for j in range(num_states))
        if row_sum > 0:
            for j in range(num_states):
                transition_counts[i, j] /= row_sum  # Normalize the row

    return transition_counts

# Refined Version adding Laplace Smoothing for better performance in prediction
'''
def construct_markov_matrix(tones_list, num_states=5):
    transition_counts = sa.Matrix(sa.SR, num_states, num_states, 1)  # Laplace smoothing (start with 1)

    # Initialize with zeros
    transition_counts = sa.Matrix(sa.SR, num_states, num_states, 0)  

    for tones in tones_list:
        for i in range(len(tones) - 1):
            current_tone = tones[i] - 1  # Adjusting to 0-based index
            next_tone = tones[i + 1] - 1  # Adjusting to 0-based index
            transition_counts[current_tone, next_tone] += 1

    for i in range(num_states):
        row_sum = sum(transition_counts[i, j] for j in range(num_states))
        if row_sum > 0:
            for j in range(num_states):
                transition_counts[i, j] /= row_sum  # Normalize the row

    return transition_counts
'''

'\ndef construct_markov_matrix(tones_list, num_states=5):\n    transition_counts = sa.Matrix(sa.SR, num_states, num_states, 1)  # Laplace smoothing (start with 1)\n\n    # Initialize with zeros\n    transition_counts = sa.Matrix(sa.SR, num_states, num_states, 0)  \n\n    for tones in tones_list:\n        for i in range(len(tones) - 1):\n            current_tone = tones[i] - 1  # Adjusting to 0-based index\n            next_tone = tones[i + 1] - 1  # Adjusting to 0-based index\n            transition_counts[current_tone, next_tone] += 1\n\n    for i in range(num_states):\n        row_sum = sum(transition_counts[i, j] for j in range(num_states))\n        if row_sum > 0:\n            for j in range(num_states):\n                transition_counts[i, j] /= row_sum  # Normalize the row\n\n    return transition_counts\n'

In [4]:
# Strategy 1 - Calculating the Log-liklihood of Markov Matrices

from sage.all import log
def compute_log_likelihood(matrix, test_tones):
    """
    Calculate the log-likelihood of a sequence of tones given a SageMath Markov matrix.
    """
    log_likelihood = 0
    for i in range(len(test_tones) - 1):
        current_state = test_tones[i] - 1
        next_state = test_tones[i + 1] - 1
        probability = matrix[current_state, next_state]
        if probability > 0:
            log_likelihood += log(probability)
        else:
            # Log(0) is -infinity
            log_likelihood += float('-inf')  
    return log_likelihood


In [5]:
def guess_author(test_tones_list, matrix_zhu, matrix_du):
    total_likelihood_zhu = sum(compute_log_likelihood(matrix_zhu, tones) for tones in test_tones_list)
    total_likelihood_du = sum(compute_log_likelihood(matrix_du, tones) for tones in test_tones_list)
    
    print(f"Total log-likelihood for Zhu Shuzhen: {total_likelihood_zhu}")
    print(f"Total log-likelihood for Du Fu: {total_likelihood_du}")

    if total_likelihood_zhu > total_likelihood_du:
        return "Zhu Shuzhen"
    else:
        return "Du Fu"

In [6]:
def display_markov_matrix(matrix):
    print("Markov Matrix (5-tone system):")
    print(matrix)

In [7]:
# Test Case:
 
zsz_file = "zsz.txt"
df_file = "df.txt"

zsz_tones = extract_tones_from_file(zsz_file)
df_tones = extract_tones_from_file(df_file)

matrix_zhu = construct_markov_matrix(zsz_tones)
matrix_du = construct_markov_matrix(df_tones)

print("Zhu Shuzhen's Markov Matrix:")
print(matrix_zhu)
print("\nDu Fu's Markov Matrix:")
print(matrix_du)

test_tones_zsz = zsz_tones[:20]  
test_tones_df = df_tones[:20]   

# Predict authorship based on multiple test sequences
print("\nPredicted author for Zhu Shuzhen's test tones: ", guess_author(test_tones_zsz, matrix_zhu, matrix_du))
print("Predicted author for Du Fu's test tones: ", guess_author(test_tones_df, matrix_zhu, matrix_du))

Zhu Shuzhen's Markov Matrix:
[  3/10 47/190 29/190   3/10]
[53/156    1/4  11/78   7/26]
[35/103 27/103 15/103 26/103]
[55/199 56/199 27/199 61/199]

Du Fu's Markov Matrix:
[   1/3 29/103 46/309 73/309]
[88/279 73/279 49/279  23/93]
[ 27/80 41/160   1/10 49/160]
[69/236 37/118 21/118 51/236]
Total log-likelihood for Zhu Shuzhen: 3*log(35/103) + 7*log(53/156) + 6*log(61/199) + 22*log(3/10) + 6*log(56/199) + 15*log(55/199) + 6*log(7/26) + 4*log(27/103) + 6*log(26/103) + 6*log(1/4) + 8*log(47/190) + 4*log(29/190) + 2*log(15/103) + 3*log(11/78) + 4*log(27/199)
Total log-likelihood for Du Fu: 3*log(27/80) + 10*log(1/3) + 7*log(88/279) + 6*log(37/118) + 6*log(49/160) + 15*log(69/236) + 8*log(29/103) + 6*log(73/279) + 4*log(41/160) + 6*log(23/93) + 12*log(73/309) + 6*log(51/236) + 4*log(21/118) + 3*log(49/279) + 4*log(46/309) + 2*log(1/10)

Predicted author for Zhu Shuzhen's test tones:  Zhu Shuzhen
Total log-likelihood for Zhu Shuzhen: 10*log(35/103) + 9*log(53/156) + 5*log(61/199) + 19*log(

In [8]:
# Formal Test Case Implementation
zsz_file = "zsz.txt"
zsz_test_file = "zsz-test.txt"
df_file = "df.txt"
df_test_file = ""

zsz_tones = extract_tones_from_file(zsz_file)
zsz_test_tones = extract_tones_from_file(zsz_test_file)
df_tones = extract_tones_from_file(df_file)

matrix_zhu = construct_markov_matrix(zsz_tones) 
matrix_du = construct_markov_matrix(df_tones)

print("Zhu Shuzhen's Markov Matrix:")
print(matrix_zhu)
print("\nDu Fu's Markov Matrix:")
print(matrix_du)  

# Predict authorship based on multiple test sequences
print("\nPredicted author for Zhu Shuzhen's test tones: ", guess_author(zsz_test_tones, matrix_zhu, matrix_du))
# print("Predicted author for Du Fu's test tones: ", guess_author(test_tones_df, matrix_zhu, matrix_du))


Zhu Shuzhen's Markov Matrix:
[  3/10 47/190 29/190   3/10]
[53/156    1/4  11/78   7/26]
[35/103 27/103 15/103 26/103]
[55/199 56/199 27/199 61/199]

Du Fu's Markov Matrix:
[   1/3 29/103 46/309 73/309]
[88/279 73/279 49/279  23/93]
[ 27/80 41/160   1/10 49/160]
[69/236 37/118 21/118 51/236]
Total log-likelihood for Zhu Shuzhen: 3*log(35/103) + 5*log(53/156) + 4*log(61/199) + 3*log(3/10) + 6*log(56/199) + 3*log(55/199) + 2*log(7/26) + 5*log(26/103) + 5*log(1/4) + 3*log(47/190) + 4*log(29/190) + 2*log(15/103) + 2*log(11/78) + log(27/199)
Total log-likelihood for Du Fu: 3*log(27/80) + log(1/3) + 5*log(88/279) + 6*log(37/118) + 5*log(49/160) + 3*log(69/236) + 3*log(29/103) + 5*log(73/279) + 2*log(23/93) + 2*log(73/309) + 4*log(51/236) + log(21/118) + 2*log(49/279) + 4*log(46/309) + 2*log(1/10)

Predicted author for Zhu Shuzhen's test tones:  Zhu Shuzhen


## Strategy 2: Cosine Similarity Calculation

In [9]:
import sage.all as sa

# Markov Matrix Implementation that the SUM of column matrix summed up to 1
def construct_markov_matrix(tones_list, num_states=4):
    transition_counts = sa.Matrix(sa.SR, num_states, num_states) 

    for tones in tones_list:
        for i in range(len(tones) - 1):
            current_tone = tones[i] - 1  # Convert tone to zero-based index
            next_tone = tones[i + 1] - 1
            transition_counts[current_tone, next_tone] += 1

    # Normalize each column to sum to 1
    for j in range(num_states):
        col_sum = sum(transition_counts[i, j] for i in range(num_states))
        if col_sum > 0:
            for i in range(num_states):
                transition_counts[i, j] /= col_sum

    return transition_counts

# Compute the Equilibrium Vector
import numpy as np

def equilibrium_vector(matrix):
    """
    Compute the equilibrium vector using NumPy for numerical stability.
    """
    # Convert SageMath matrix to NumPy array
    matrix_np = np.array(matrix, dtype=float)

    eigenvalues, eigenvectors = np.linalg.eig(matrix_np)

    # Find the eigenvector corresponding to the eigenvalue closest to 1
    idx = np.argmin(np.abs(eigenvalues - 1))

    equilibrium_vec = np.real(eigenvectors[:, idx])
    equilibrium_vec /= equilibrium_vec.sum()

    return equilibrium_vec

In [10]:
import sage.all as sa

# Define Cosine Similarity
def cosine_similarity(vec1, vec2):
    vec1, vec2 = sa.vector(vec1), sa.vector(vec2)
    return vec1.dot_product(vec2) / (vec1.norm() * vec2.norm())

# Define Euclidean Distance
def euclidean_distance(vec1, vec2):
    return (sa.vector(vec1) - sa.vector(vec2)).norm()

# Define Weighted Score Calculation
# FIXME: ABORTED TRY
def weighted_score(cos_sim, dist, alpha=0.5, beta=0.5):
    return alpha * cos_sim + beta * (1 / (1 + dist))

In [11]:
# Author Prediction Calculation
def predict_author(test_vector, vec_zhu, vec_du):
    """
    Predict author using cosine similarity and Euclidean distance
    between the test vector and the poets' equilibrium vectors separately.
    """
    cos_sim_zhu = cosine_similarity(test_vector, vec_zhu)
    cos_sim_du = cosine_similarity(test_vector, vec_du)

    dist_zhu = euclidean_distance(test_vector, vec_zhu)
    dist_du = euclidean_distance(test_vector, vec_du)


    print(f"Cosine Similarity with Zhu Shuzhen: {cos_sim_zhu}")
    print(f"Cosine Similarity with Du Fu: {cos_sim_du}")
    print(f"Euclidean Distance to Zhu Shuzhen: {dist_zhu}")
    print(f"Euclidean Distance to Du Fu: {dist_du}")

    cos_prediction = "Zhu Shuzhen" if cos_sim_zhu > cos_sim_du else "Du Fu"
    dist_prediction = "Zhu Shuzhen" if dist_zhu < dist_du else "Du Fu"

    return cos_prediction, dist_prediction

In [12]:
# Debug Usage Tool Base 

def debug_matrix(matrix, name="Markov Matrix"):

    print(f"\n{name}:")
    print(matrix)
    print(f"Column sums: {[sum(matrix.column(j)) for j in range(matrix.ncols())]}")
    try:
        eigenvalues = matrix.eigenvalues()
        print(f"Eigenvalues: {eigenvalues}")
    except Exception as e:
        print(f"Error computing eigenvalues: {e}")

In [13]:
# Sample Test Case
zsz_file = "zsz.txt"
df_file = "df.txt"

zsz_tones = extract_tones_from_file(zsz_file)
df_tones = extract_tones_from_file(df_file)

filtered_zsz_tones = [seq for seq in zsz_tones if seq]
filtered_df_tones = [seq for seq in df_tones if seq]

matrix_zhu = construct_markov_matrix(filtered_zsz_tones)
matrix_du = construct_markov_matrix(filtered_df_tones)

debug_matrix(matrix_zhu, "Zhu Shuzhen's Markov Matrix")
debug_matrix(matrix_du, "Du Fu's Markov Matrix")

try:
    vec_zhu = equilibrium_vector(matrix_zhu)
    vec_du = equilibrium_vector(matrix_du)
except ValueError as e:
    print(f"Error computing equilibrium vector: {e}")
    exit()

test_vector_zsz = equilibrium_vector(construct_markov_matrix(zsz_tones[:20]))
test_vector_df = equilibrium_vector(construct_markov_matrix(df_tones[:20]))

print("\nPredicted author for Zhu Shuzhen's test tones:")
cos_pred, dist_pred = predict_author(test_vector_zsz, vec_zhu, vec_du)
print(f"Cosine Similarity Prediction: {cos_pred}")
print(f"Euclidean Distance Prediction: {dist_pred}")

print("\nPredicted author for Du Fu's test tones:")
cos_pred, dist_pred = predict_author(test_vector_df, vec_zhu, vec_du)
print(f"Cosine Similarity Prediction: {cos_pred}")
print(f"Euclidean Distance Prediction: {dist_pred}")

'''
# Paths for input texts
zsz_file = "zsz.txt"
df_file = "df.txt"

# Extract tones from both files
zsz_tones = extract_tones_from_file(zsz_file)
df_tones = extract_tones_from_file(df_file)

filtered_zsz_tones = [seq for seq in zsz_tones if seq]
filtered_df_tones = [seq for seq in df_tones if seq]

matrix_zhu = construct_markov_matrix(filtered_zsz_tones)
matrix_du = construct_markov_matrix(filtered_df_tones)

debug_matrix(matrix_zhu, "Zhu Shuzhen's Markov Matrix")
debug_matrix(matrix_du, "Du Fu's Markov Matrix")

# Compute equilibrium vectors using the NumPy-based method
try:
    vec_zhu = equilibrium_vector(matrix_zhu)
    vec_du = equilibrium_vector(matrix_du)
except ValueError as e:
    print(f"Error computing equilibrium vector: {e}")
    exit()

# Multiple Sequences Prediction
print("\nPredicted author for Zhu Shuzhen's test tones:",
      predict_author(filtered_zsz_tones[:3], vec_zhu, vec_du))
print("Predicted author for Du Fu's test tones:",
      predict_author(filtered_df_tones[:3], vec_zhu, vec_du))

# Single Sequence Prediction
print("\nPredicted author for a single Zhu Shuzhen sequence:",
      predict_author([filtered_zsz_tones[0]], vec_zhu, vec_du))
print("Predicted author for a single Du Fu sequence:",
      predict_author([filtered_df_tones[0]], vec_zhu, vec_du))
'''


Zhu Shuzhen's Markov Matrix:
[57/200 47/169  29/93  19/62]
[53/200   3/13  22/93   7/31]
[  7/40 27/169   5/31  13/93]
[ 11/40 56/169   9/31 61/186]
Column sums: [1, 1, 1, 1]
Eigenvalues: [-1/1450800*(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3)*(I*sqrt(3) + 1) + 41631431/1450800*(-I*sqrt(3) + 1)/(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) + 1213/725400, -1/1450800*(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3)*(-I*sqrt(3) + 1) + 41631431/1450800*(I*sqrt(3) + 1)/(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) + 1213/725400, 1/725400*(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) - 41631431/725400/(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) + 1213/725400, 1]

Du Fu's Markov Matrix:
[103/314  87/275  46/153  73/242]
[ 44/157  73/275  49/153  69/242]
[ 27/157  41/275  16/153  49/242]
[ 69/314  74/275   14/51  51/242]
Column sums: [1, 1, 1, 1]
Eigenvalues: [-1/871962300*(59451975*sqrt(346

'\n# Paths for input texts\nzsz_file = "zsz.txt"\ndf_file = "df.txt"\n\n# Extract tones from both files\nzsz_tones = extract_tones_from_file(zsz_file)\ndf_tones = extract_tones_from_file(df_file)\n\nfiltered_zsz_tones = [seq for seq in zsz_tones if seq]\nfiltered_df_tones = [seq for seq in df_tones if seq]\n\nmatrix_zhu = construct_markov_matrix(filtered_zsz_tones)\nmatrix_du = construct_markov_matrix(filtered_df_tones)\n\ndebug_matrix(matrix_zhu, "Zhu Shuzhen\'s Markov Matrix")\ndebug_matrix(matrix_du, "Du Fu\'s Markov Matrix")\n\n# Compute equilibrium vectors using the NumPy-based method\ntry:\n    vec_zhu = equilibrium_vector(matrix_zhu)\n    vec_du = equilibrium_vector(matrix_du)\nexcept ValueError as e:\n    print(f"Error computing equilibrium vector: {e}")\n    exit()\n\n# Multiple Sequences Prediction\nprint("\nPredicted author for Zhu Shuzhen\'s test tones:",\n      predict_author(filtered_zsz_tones[:3], vec_zhu, vec_du))\nprint("Predicted author for Du Fu\'s test tones:",\n   

In [14]:
print("Equilibrium Vector for Zhu Shuzhen's Matrix:")
print(vec_zhu)
print("Equilibrium Vector for Du Fu's Matrix:")
print(vec_du)

Equilibrium Vector for Zhu Shuzhen's Matrix:
[0.29418287 0.24023142 0.15834941 0.3072363 ]
Equilibrium Vector for Du Fu's Matrix:
[0.31394412 0.28370395 0.16190486 0.24044708]


In [15]:
print("Zhu Shuzhen's Markov Matrix:")
print(matrix_zhu)
print("Du Fu's Markov Matrix:")
print(matrix_du)

Zhu Shuzhen's Markov Matrix:
[57/200 47/169  29/93  19/62]
[53/200   3/13  22/93   7/31]
[  7/40 27/169   5/31  13/93]
[ 11/40 56/169   9/31 61/186]
Du Fu's Markov Matrix:
[103/314  87/275  46/153  73/242]
[ 44/157  73/275  49/153  69/242]
[ 27/157  41/275  16/153  49/242]
[ 69/314  74/275   14/51  51/242]


In [16]:
# Formal Test Case Implementaion: 
zsz_file = "zsz.txt"
zsz_file_test = "zsz-test.txt"
df_file = "df.txt"

zsz_tones = extract_tones_from_file(zsz_file)
df_tones = extract_tones_from_file(df_file)

filtered_zsz_tones = [seq for seq in zsz_tones if seq]
filtered_df_tones = [seq for seq in df_tones if seq]

matrix_zhu = construct_markov_matrix(filtered_zsz_tones)
matrix_du = construct_markov_matrix(filtered_df_tones)

debug_matrix(matrix_zhu, "Zhu Shuzhen's Markov Matrix")
debug_matrix(matrix_du, "Du Fu's Markov Matrix")

try:
    vec_zhu = equilibrium_vector(matrix_zhu)
    vec_du = equilibrium_vector(matrix_du)
except ValueError as e:
    print(f"Error computing equilibrium vector: {e}")
    exit()

test_vector_zsz = equilibrium_vector(construct_markov_matrix(zsz_file_test))
# test_vector_df = equilibrium_vector(construct_markov_matrix(df_tones[:20]))

print("\nPredicted author for Zhu Shuzhen's test tones:")
cos_pred, dist_pred = predict_author(test_vector_zsz, vec_zhu, vec_du)
print(f"Cosine Similarity Prediction: {cos_pred}")
print(f"Euclidean Distance Prediction: {dist_pred}")

'''
print("\nPredicted author for Du Fu's test tones:")
cos_pred, dist_pred = predict_author(test_vector_df, vec_zhu, vec_du)
print(f"Cosine Similarity Prediction: {cos_pred}")
print(f"Euclidean Distance Prediction: {dist_pred}")
'''


Zhu Shuzhen's Markov Matrix:
[57/200 47/169  29/93  19/62]
[53/200   3/13  22/93   7/31]
[  7/40 27/169   5/31  13/93]
[ 11/40 56/169   9/31 61/186]
Column sums: [1, 1, 1, 1]
Eigenvalues: [-1/1450800*(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3)*(I*sqrt(3) + 1) + 41631431/1450800*(-I*sqrt(3) + 1)/(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) + 1213/725400, -1/1450800*(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3)*(-I*sqrt(3) + 1) + 41631431/1450800*(I*sqrt(3) + 1)/(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) + 1213/725400, 1/725400*(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) - 41631431/725400/(900*sqrt(117109835103492073)*sqrt(130) + 3501358455997)^(1/3) + 1213/725400, 1]

Du Fu's Markov Matrix:
[103/314  87/275  46/153  73/242]
[ 44/157  73/275  49/153  69/242]
[ 27/157  41/275  16/153  49/242]
[ 69/314  74/275   14/51  51/242]
Column sums: [1, 1, 1, 1]
Eigenvalues: [-1/871962300*(59451975*sqrt(346

'\nprint("\nPredicted author for Du Fu\'s test tones:")\ncos_pred, dist_pred = predict_author(test_vector_df, vec_zhu, vec_du)\nprint(f"Cosine Similarity Prediction: {cos_pred}")\nprint(f"Euclidean Distance Prediction: {dist_pred}")\n'

## Baseline Strategy: Vector Norm Comparison

In [17]:
def baseline_vector_infinity_norm_comparison(test_vector, vec_zhu, vec_du):
    """
    Baseline strategy: Compare the infinity norm (max absolute difference) between the test equilibrium vector
    and the poets' equilibrium vectors. Predict the author with the smaller norm.
    """
    # Convert vectors to NumPy arrays if necessary
    test_vector = np.array(test_vector)
    vec_zhu = np.array(vec_zhu)
    vec_du = np.array(vec_du)

    # Compute the infinity norm differences (maximum absolute difference)
    norm_diff_zhu = np.max(np.abs(test_vector - vec_zhu))  # Infinity norm for Zhu Shuzhen
    norm_diff_du = np.max(np.abs(test_vector - vec_du))    # Infinity norm for Du Fu

    # Print infinity norm differences for verification
    print(f"Infinity Norm Difference with Zhu Shuzhen: {norm_diff_zhu}")
    print(f"Infinity Norm Difference with Du Fu: {norm_diff_du}")

    # Predict the author based on which infinity norm difference is smaller
    return "Zhu Shuzhen" if norm_diff_zhu < norm_diff_du else "Du Fu"

In [18]:
# Example test case: Predict using infinity norm-based baseline strategy

zsz_file = "zsz.txt"
zsz_file_test = "zsz-test.txt"
df_file = "df.txt"

zsz_tones = extract_tones_from_file(zsz_file)
df_tones = extract_tones_from_file(df_file)

filtered_zsz_tones = [seq for seq in zsz_tones if seq]
filtered_df_tones = [seq for seq in df_tones if seq]

matrix_zhu = construct_markov_matrix(filtered_zsz_tones)
matrix_du = construct_markov_matrix(filtered_df_tones)

test_vector_zsz = equilibrium_vector(construct_markov_matrix(zsz_tones[:3]))
test_vector_df = equilibrium_vector(construct_markov_matrix(df_tones[:3]))

print("\nBaseline Infinity Norm Prediction for Zhu Shuzhen's test tones:")
baseline_infinity_norm_prediction = baseline_vector_infinity_norm_comparison(test_vector_zsz, vec_zhu, vec_du)
print(f"Baseline Infinity Norm Prediction: {baseline_infinity_norm_prediction}")

print("\nBaseline Infinity Norm Prediction for Du Fu's test tones:")
baseline_infinity_norm_prediction = baseline_vector_infinity_norm_comparison(test_vector_df, vec_zhu, vec_du)
print(f"Baseline Infinity Norm Prediction: {baseline_infinity_norm_prediction}")


Baseline Infinity Norm Prediction for Zhu Shuzhen's test tones:
Infinity Norm Difference with Zhu Shuzhen: 0.20931363136794695
Infinity Norm Difference with Du Fu: 0.18955238809187513
Baseline Infinity Norm Prediction: Du Fu

Baseline Infinity Norm Prediction for Du Fu's test tones:
Infinity Norm Difference with Zhu Shuzhen: 0.19672621878053476
Infinity Norm Difference with Du Fu: 0.17696497550446294
Baseline Infinity Norm Prediction: Du Fu


In [19]:
# Formal Test case: 
zsz_file = "zsz.txt"
zsz_file_test = "zsz-test.txt"
df_file = "df.txt"

zsz_tones = extract_tones_from_file(zsz_file)
zsz_test_tones = extract_tones_from_file(zsz_file_test)
df_tones = extract_tones_from_file(df_file)

filtered_zsz_tones = [seq for seq in zsz_tones if seq]
filtered_zsz_tones_test = [seq for seq in zsz_test_tones if seq]
filtered_df_tones = [seq for seq in df_tones if seq]

matrix_zhu = construct_markov_matrix(filtered_zsz_tones)
matrix_zhu_test = construct_markov_matrix(filtered_zsz_tones_test)
matrix_du = construct_markov_matrix(filtered_df_tones)

test_vector_zsz = equilibrium_vector(matrix_zhu_test)

print("\nBaseline Infinity Norm Prediction for Zhu Shuzhen's test tones:")
baseline_infinity_norm_prediction = baseline_vector_infinity_norm_comparison(test_vector_zsz, vec_zhu, vec_du)
print(f"Baseline Infinity Norm Prediction: {baseline_infinity_norm_prediction}")

'''
print("\nBaseline Infinity Norm Prediction for Du Fu's test tones:")
baseline_infinity_norm_prediction = baseline_vector_infinity_norm_comparison(test_vector_df, vec_zhu, vec_du)
print(f"Baseline Infinity Norm Prediction: {baseline_infinity_norm_prediction}")
'''


Baseline Infinity Norm Prediction for Zhu Shuzhen's test tones:
Infinity Norm Difference with Zhu Shuzhen: 0.07692068226362686
Infinity Norm Difference with Du Fu: 0.09668192553969868
Baseline Infinity Norm Prediction: Zhu Shuzhen


'\nprint("\nBaseline Infinity Norm Prediction for Du Fu\'s test tones:")\nbaseline_infinity_norm_prediction = baseline_vector_infinity_norm_comparison(test_vector_df, vec_zhu, vec_du)\nprint(f"Baseline Infinity Norm Prediction: {baseline_infinity_norm_prediction}")\n'