In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
hits_path = "/data/Skoglund2014/skoglund_mapped/renamed/hits.log"

In [None]:
# 1. Read in the tab-delimited log file (adjust filename if different)
column_names = [
    'processed',  # fragments processed
    'seq_len',
    'max_l',
    'l',          # position of aDNA read
    'i',          # substitution type index (7 = C>T)
    'lsize',
    'diff5p',     # C>T substitution frequency difference
    'diff3p'
]
df = pd.read_csv(hits_path, sep='\t', header=None, names=column_names)

In [None]:
df = df.iloc[0:100000]

In [None]:
# 2. Filter to only C>T (i == 7) and positions 0–4
df_ct = df[(df['i'] == 7) & (df['l'].isin(range(5)))]

# 3. Plot convergence curves with a logarithmic x-axis
plt.figure()
for pos in sorted(df_ct['l'].unique()):
    subset = df_ct[df_ct['l'] == pos]
    plt.plot(
        subset['processed'],
        subset['diff5p'],
        label=f'{int(pos) }'
    )

plt.xscale('log')
plt.xlabel('Fragments Processed (log scale)')
plt.ylabel('Difference of C>T Substitution Frequency\nper 500 Analysed Fragments')
plt.title('Convergence of Substitution Frequency by Position\nAncient Human Sample; 5,000BP; ~1.33X Cov; Skoglund, 2014')
plt.legend(title='Fragment Position')
plt.tight_layout()
plt.savefig('convergence_high.svg', format='svg', bbox_inches='tight')
plt.show()

In [None]:
hits_path="/02emp/bams/hits_low_meta.log"

In [None]:
# 1. Read in the tab-delimited log file (adjust filename if different)
column_names = [
    'processed',  # fragments processed
    'seq_len',
    'max_l',
    'l',          # position of aDNA read
    'i',          # substitution type index (7 = C>T)
    'lsize',
    'diff5p',     # C>T substitution frequency difference
    'diff3p'
]
df = pd.read_csv(hits_path, sep='\t', header=None, names=column_names)

In [None]:
df

In [None]:
df = df.iloc[0:500]

In [None]:
# 2. Filter to only C>T (i == 7) and positions 0–4
df_ct = df[(df['i'] == 7) & (df['l'].isin(range(5)))]

# 3. Plot convergence curves with a logarithmic x-axis
plt.figure()
for pos in sorted(df_ct['l'].unique()):
    subset = df_ct[df_ct['l'] == pos]
    plt.plot(
        subset['processed'],
        subset['diff5p'],
        label=f'{int(pos) }'
    )

plt.xscale('log')
plt.xlabel('Fragments Processed (log scale)')
plt.ylabel('Difference of C>T Substitution Frequency\nper 500 Analysed Fragments')
plt.title('Convergence of Substitution Frequency by Position\n$\it{Tannerella}$ $\it{forsythia}$ from Dental Calculus\n \
Later Stone Age; <1X Cov; Fellows-Yates, 2021')
plt.legend(title='Fragment Position')
plt.tight_layout()
plt.savefig('convergence_low.svg', format='svg', bbox_inches='tight')
plt.show()