# Confidence interval estimation using bootstrapping

---

Mathieu Bourdenx - October 2020

[Github](https://github.com/MathieuBo)

---

### Description

**As input**
Provide a .txt document with one motif per line

**Output**
Excel document with Mean, 95CI upper and lower bounds and Std

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from tqdm import tqdm

In [2]:
# Load file exported from Motif counter

motifs_df = pd.read_excel('./Output/KFERQ_analysis_export.xlsx')

In [3]:
# Choose which quantification method to consider

methods = ['Method1', 'Method2', 'Method3', 'Method4']

In [4]:
def perform_bootstrap(input_data = motifs_df, methods=methods):
    
    results = []
    
    for m in methods:

        motifs = motifs_df[m]

        motif_types = np.unique(motifs)

        count_res = dict()

        for i in motif_types:

            count_res[i] = list()

        for i in np.arange(100000):

            local_list = np.random.choice(motifs, size=len(motifs), replace=True)

            motif_kind , counts = np.unique(local_list, return_counts=True)

            for mot, count in zip(motif_kind, counts):

                count_res[mot].append(count/len(motifs)*100)

        for i in motif_types:

            local_res = dict()
            local_res['Method'] = m
            local_res['Motif Type'] = i
            local_res['Mean'] = np.mean(count_res[i])
            local_res['95CI lower'] = np.percentile(count_res[i], 2.5)
            local_res['95CI upper'] = np.percentile(count_res[i], 97.5)
            local_res['STD'] = np.std(count_res[i])

            results.append(local_res)
        
    return pd.DataFrame(results)

In [5]:
example = perform_bootstrap(motifs_df, methods)

In [6]:
example

Unnamed: 0,Method,Motif Type,Mean,95CI lower,95CI upper,STD
0,Method1,Canonical,42.295611,34.857143,49.714286,3.73848
1,Method1,No motif,57.704389,50.285714,65.142857,3.73848
2,Method2,Canonical,42.288337,34.857143,49.714286,3.736748
3,Method2,No motif,23.440497,17.142857,29.714286,3.190177
4,Method2,PTM,34.271166,27.428571,41.142857,3.592567
5,Method3,Acetyl.,15.414491,10.285714,21.142857,2.729504
6,Method3,Canonical,42.305069,34.857143,49.714286,3.744083
7,Method3,No motif,23.422983,17.142857,29.714286,3.204078
8,Method3,Phospho.,18.857457,13.142857,24.571429,2.959998
9,Method4,Acetyl.,22.862886,16.571429,29.142857,3.17331


In [8]:
# Save the dataframe as an Excel document in the local directory
example.to_excel('./Output/bootstrap_results.xlsx')