# Canid Analyses
## Dependicies
First we will load the necessary packages.

In [1]:
# Import packages.
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy.stats as stats

Next we define a function to calculate all the introgression metrics for detecting introgression.

In [2]:
# Define a function to calculate D, Danc, and D+.
def canid_detection(
    abba,
    baba,
    baaa,
    abaa,
):
    """
    ###########################################################################
    INPUT: Genome-wide ABBA, BABA, BAAA, and ABAA counts.
    ---------------------------------------------------------------------------
    OUTPUT: Patterson's D, Danc, and D+ values.
    ###########################################################################
    """
    # Calculate Patterson's D.
    d = ((abba - baba) / (abba + baba))
    # Calculate Danc.
    danc = ((baaa - abaa) / (baaa + abaa))
    # Calculate D+.
    dplus = (((abba - baba) + (baaa - abaa)) / ((abba + baba) + (baaa + abaa)))
    return d, danc, dplus

Next we define a function to calculate all the introgression metrics for quantifying introgression.

In [3]:
# Define a function to calculate fhom, fanc, and f+.
def canid_quantification(
    abba,
    baba,
    aaba,
    baaa,
    abaa,
    bbaa,
):
    """
    ###########################################################################
    INPUT: Genome-wide ABBA, BABA, AABA, BAAA, ABAA, and BBAA counts.
    ---------------------------------------------------------------------------
    OUTPUT: fhom, fanc, and f+ values.
    ###########################################################################
    """
    # Calculate fhom.
    fhom = ((abba - baba) / (abba + aaba))
    # Calculate fanc.
    fanc = ((baaa - abaa) / (baaa + bbaa))
    # Calculate f+.
    fplus = (((abba - baba) + (baaa - abaa)) / ((abba + aaba) + (baaa + bbaa)))
    return fhom, fanc, fplus

## (((Dingo, Basenji), Israeli Wolf), Golden Jackal) Results
First we load the observed and bootstrapped site pattern results.

In [4]:
# Load the observed site patterns as a numpy array.
obs_site_patterns = np.loadtxt(
    './canid_site_pattern_counts.csv',
    delimiter=',', dtype=int,
)
# Load the bootstrapped site patterns as a numpy array.
bs_site_patterns = np.loadtxt(
    './canid_bootstrapped_site_patterns.csv',
    delimiter=',', dtype=int,
)

Next we calculate all introgression metrics and assess signficance.

In [5]:
# Calculate observed detection metrics.
obs_d, obs_danc, obs_dplus = canid_detection(
    abba=obs_site_patterns[0],
    baba=obs_site_patterns[1],
    baaa=obs_site_patterns[3],
    abaa=obs_site_patterns[4],
)
# Calculate observed quantification metrics.
obs_fhom, obs_fanc, obs_fplus = canid_quantification(
    abba=obs_site_patterns[0],
    baba=obs_site_patterns[1],
    aaba=obs_site_patterns[5],
    baaa=obs_site_patterns[3],
    abaa=obs_site_patterns[4],
    bbaa=obs_site_patterns[2],
)
# Calculate bootstrapped detection metrics.
bs_d, bs_danc, bs_dplus = canid_detection(
    abba=bs_site_patterns[:, 0],
    baba=bs_site_patterns[:, 1],
    baaa=bs_site_patterns[:, 3],
    abaa=bs_site_patterns[:, 4],
)
# Calculate bootstrapped quantification metrics.
bs_fhom, bs_fanc, bs_fplus = canid_quantification(
    abba=bs_site_patterns[:, 0],
    baba=bs_site_patterns[:, 1],
    aaba=bs_site_patterns[:, 5],
    baaa=bs_site_patterns[:, 3],
    abaa=bs_site_patterns[:, 4],
    bbaa=bs_site_patterns[:, 2],
)
# Calculate standard deviations for each introgression metric from the bootstrapped distributions.
bs_d_std = np.std(bs_d)
bs_danc_std = np.std(bs_danc)
bs_dplus_std = np.std(bs_dplus)
bs_fhom_std = np.std(bs_fhom)
bs_fanc_std = np.std(bs_fanc)
bs_fplus_std = np.std(bs_fplus)
# Use the survival function to calculate p-values for detection metrics.
d_pval = norm.sf(x=abs(obs_d), loc=0, scale=bs_d_std)
danc_pval = norm.sf(x=abs(obs_danc), loc=0, scale=bs_danc_std)
dplus_pval = norm.sf(x=abs(obs_dplus), loc=0, scale=bs_dplus_std)

Next we generate a pandas data frame to visualize the results.

In [6]:
# Intialize observed site patterns of interest.
obs_abba = obs_site_patterns[0]
obs_baba = obs_site_patterns[1]
obs_baaa = obs_site_patterns[3]
obs_abaa = obs_site_patterns[4]
# Calculate site pattern differences.
obs_abba_baba = obs_abba - obs_baba
obs_baaa_abaa = obs_baaa - obs_abaa
# Construct the results data frame.
results_df = pd.DataFrame(
    data={
        r'$ABBA$': [obs_abba],
        r'$BABA$': [obs_baba],
        r'$BAAA$': [obs_baaa],
        r'$ABAA$': [obs_abaa],
        r'$ABBA-BABA$': [obs_abba_baba],
        r'$BAAA-ABAA$': [obs_baaa_abaa],
        r'$D$': [obs_d],
        r'$D \;(p-value)$': [d_pval],
        r'$D_{anc}$': [obs_danc],
        r'$D_{anc} \;(p-value)$': [danc_pval],
        r'$D+$': [obs_dplus],
        r'$D+ \;(p-value)$': [dplus_pval],
        r'$f_{hom}$': [obs_fhom],
        r'$f_{hom} \;(\sigma)$': [bs_fhom_std],
        r'$f_{anc}$': [obs_fanc],
        r'$f_{anc} \;(\sigma)$': [bs_fanc_std],
        r'$f+$': [obs_fplus],
        r'$f+ \;(\sigma)$': [bs_fplus_std],
    },
)
results_df

Unnamed: 0,$ABBA$,$BABA$,$BAAA$,$ABAA$,$ABBA-BABA$,$BAAA-ABAA$,$D$,$D \;(p-value)$,$D_{anc}$,$D_{anc} \;(p-value)$,$D+$,$D+ \;(p-value)$,$f_{hom}$,$f_{hom} \;(\sigma)$,$f_{anc}$,$f_{anc} \;(\sigma)$,$f+$,$f+ \;(\sigma)$
0,186412,154645,413597,370899,31767,42698,0.093143,1.205284e-43,0.054427,9.033022e-40,0.066159,5.049422e-49,0.050855,0.003773,0.064633,0.004862,0.057936,0.003965


Lastly we export the pandas data frame to a .tex file.

In [7]:
# Intialize a trio array.
trio_array = np.array(['(((Dingo, Basenji), Israeli Wolf), Golden Jackal)'])
# Intialize the table header.
header = r'\begin{tabular}{@{}lcccccccccccccccccc@{}}'+'\n'\
+r'\toprule'+'\n'\
+r' & \multicolumn{4}{c}{Site Pattern Counts} & \multicolumn{2}{c}{Site Pattern Differences} & \multicolumn{6}{c}{Detection Metrics} & \multicolumn{6}{c}{Quantification Metrics} \\'+'\n'\
+r'\cmidrule(lr){2-5} \cmidrule(lr){6-7}  \cmidrule(lr){8-13} \cmidrule(lr){14-19}'+'\n'\
+r' & $ABBA$ & $BABA$ & $BAAA$ & $ABAA$ & $ABBA-BABA$ & $BAAA-ABAA$ '\
+r'& $D$ & $p-value$ & $D_{anc}$ & $p-value$ & $D+$ & $p-value$ '\
+r'& $f_{hom}$ & $\sigma$ & $f_{anc}$ & $\sigma$ & $f+$ & $\sigma$ \\'+'\n'\
+r'\midrule'+'\n'\
# Intialize the table footer.
footer = r'\bottomrule'+'\n'+r'\end{tabular}'+'\n'
# Intialize a datframe matrix.
df_mat = results_df.to_numpy()
# Open the table.
table = open('../../manuscript/tables/canid_metrics_summary.tex', 'w')
# Write the header.
table.write(header)
# For every admixture proportion.
for idx in range(trio_array.size):
    # Write the results.
    table.write(
        r'{0} & {1} & {2} & {3} & {4} & {5} & {6} & {7} & {8} & {9} & {10} & {11} & {12} & {13} & {14} & {15} & {16} & {17} & {18} \\'.format(
            trio_array[idx],
            round(df_mat[idx, 0], 3), round(df_mat[idx, 1], 3),
            round(df_mat[idx, 2], 3), round(df_mat[idx, 3], 3),
            round(df_mat[idx, 4], 3), round(df_mat[idx, 5], 3),
            round(df_mat[idx, 6], 3), round(df_mat[idx, 7], 3),
            round(df_mat[idx, 8], 3), round(df_mat[idx, 9], 3),
            round(df_mat[idx, 10], 3), round(df_mat[idx, 11], 3),
            round(df_mat[idx, 12], 3), round(df_mat[idx, 13], 3),
            round(df_mat[idx, 14], 3), round(df_mat[idx, 15], 3),
            round(df_mat[idx, 16], 3), round(df_mat[idx, 17], 3),
        )+'\n')
# Write the footer.
table.write(footer)
# Close the table file.
table.close()