# Human Analyses
## Dependencies
First we will load the necessary packages.

In [2]:
# Import packages.
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy.stats as stats

Next, we define a function to consolidate all of site pattern results for the TGP trios.

In [3]:
def build_trio_diccs():
    """
    ###########################################################################
    INPUT: N/A
    ---------------------------------------------------------------------------
    OUTPUT: Genome-wide site pattern and introgression metrics for all trios.
    ###########################################################################
    """
    # Load all of the site pattern results per chromosome.
    chr1_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_1_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr2_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_2_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr3_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_3_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr4_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_4_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr5_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_5_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr6_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_6_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr7_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_7_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr8_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_8_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr9_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_9_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr10_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_10_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr11_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_11_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr12_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_12_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr13_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_13_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr14_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_14_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr15_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_15_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr16_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_16_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr17_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_17_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr18_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_18_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr19_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_19_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr20_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_20_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr21_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_21_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    chr22_df = pd.read_csv(
        './tgp_trios/tgp_altai_trios_chr_22_site_pattern_counts.csv',
        names=['POP', 'IND', 'CHR', 'ABBA', 'BABA',  'BBAA', 'BAAA', 'ABAA', 'AABA'],
    )
    # Intialize a list of all chromosomes.
    df_list = [
        chr1_df, chr2_df, chr3_df, chr4_df,
        chr5_df, chr6_df, chr7_df, chr8_df,
        chr9_df, chr10_df, chr11_df, chr12_df,
        chr13_df, chr14_df, chr15_df, chr16_df,
        chr17_df, chr18_df, chr19_df, chr20_df,
        chr21_df, chr22_df,
    ]
    # Intialize the population order.
    tgp_pop_array = np.array([
        'CEU', 'FIN', 'GBR', 'IBS', 'TSI',
        'CHB', 'CHS', 'CDX', 'JPT', 'KHV',
        'BEB', 'GIH', 'ITU', 'PJL', 'STU',
        'CLM', 'MXL', 'PEL', 'PUR',
    ])
    # Intialzie dictionaries to store the results.
    trio_dicc = {}
    mean_dicc = {}
    std_dicc = {}
    # For every chromosome.
    for chrom in range(1, 23):
        # For every population.
        trio_dicc[chrom] = {}
        mean_dicc[chrom] = {}
        std_dicc[chrom] = {}
        for pop in tgp_pop_array:
            # Fill the dictionaries.
            trio_dicc[chrom][pop] = {}
            mean_dicc[chrom][pop] = {}
            std_dicc[chrom][pop] = {}
    # For every dataframe.
    for idx in range(len(df_list)):
        # Intialize the chromosome.
        chrom = idx + 1
        # Grab the data frame.
        df = df_list[idx]
        # For every population.
        for pop in tgp_pop_array:
            # Fill the dictionaries.
            trio_dicc[chrom][pop]['ABBA'] = df[df['POP'] == pop]['ABBA'].values
            trio_dicc[chrom][pop]['BABA'] = df[df['POP'] == pop]['BABA'].values
            trio_dicc[chrom][pop]['BAAA'] = df[df['POP'] == pop]['BAAA'].values
            trio_dicc[chrom][pop]['ABAA'] = df[df['POP'] == pop]['ABAA'].values
            trio_dicc[chrom][pop]['ABBA-BABA'] = trio_dicc[chrom][pop]['ABBA'] - trio_dicc[chrom][pop]['BABA']
            trio_dicc[chrom][pop]['BAAA-ABAA'] = trio_dicc[chrom][pop]['BAAA'] - trio_dicc[chrom][pop]['ABAA']
            trio_dicc[chrom][pop]['DIFF'] = trio_dicc[chrom][pop]['ABBA-BABA'] - trio_dicc[chrom][pop]['BAAA-ABAA']
            mean_dicc[chrom][pop]['ABBA'] = np.mean(trio_dicc[chrom][pop]['ABBA'])
            mean_dicc[chrom][pop]['BABA'] = np.mean(trio_dicc[chrom][pop]['BABA'])
            mean_dicc[chrom][pop]['BAAA'] = np.mean(trio_dicc[chrom][pop]['BAAA'])
            mean_dicc[chrom][pop]['ABAA'] = np.mean(trio_dicc[chrom][pop]['ABAA'])
            mean_dicc[chrom][pop]['ABBA-BABA'] = np.mean(trio_dicc[chrom][pop]['ABBA-BABA'])
            mean_dicc[chrom][pop]['BAAA-ABAA'] = np.mean(trio_dicc[chrom][pop]['BAAA-ABAA'])
            mean_dicc[chrom][pop]['DIFF'] = np.mean(trio_dicc[chrom][pop]['DIFF'])
            std_dicc[chrom][pop]['ABBA'] = np.std(trio_dicc[chrom][pop]['ABBA'])
            std_dicc[chrom][pop]['BABA'] = np.std(trio_dicc[chrom][pop]['BABA'])
            std_dicc[chrom][pop]['BAAA'] = np.std(trio_dicc[chrom][pop]['BAAA'])
            std_dicc[chrom][pop]['ABAA'] = np.std(trio_dicc[chrom][pop]['ABAA'])
            std_dicc[chrom][pop]['ABBA-BABA'] = np.std(trio_dicc[chrom][pop]['ABBA-BABA'])
            std_dicc[chrom][pop]['BAAA-ABAA'] = np.std(trio_dicc[chrom][pop]['BAAA-ABAA'])
            std_dicc[chrom][pop]['DIFF'] = np.std(trio_dicc[chrom][pop]['DIFF'])
    return trio_dicc, mean_dicc, std_dicc