In [None]:
#######################################
Input: 
animal_groups_to_ref.vcf(VCF file with variant calls per animal group)
    
Function: 
Change the VCF format to a format where genotypes are defined per group in a 0/0 1/0 etc. format. and
write this of to a CSV file.

Output:
CSV file with genotypes per animal group per scaffold position.
#######################################

In [None]:
#Get genotype from VCF per sample and scaffold pos
library(vcfR)
vcf <- read.vcfR("/workspace/hramzr/vcf_file_generation/01.freebayes/animal_groups_to_ref.vcf", checkFile=F)
gt <- extract.gt(vcf, element = c('GT'))
?extract.gt
library(dplyr)
library(data.table)
write.csv(gt, "geno.csv")

Scanning file to determine attributes.
File attributes:
  meta lines: 2066
  header_line: 2067
  variant count: 16265487
  column count: 22


In [None]:
#######################################
Function: 
Run the cell above on the cluster for big size computation, as it was tested in reduced size.
#######################################

In [1]:
#run 2 cells above, but in a fashion that enables big size computation
module load R/3.4.3 
bsub -n 8 -e Rerror.err "Rscript Convert_VCF_to_genotype_table.r"

Job <48497> is submitted to default queue <normal>.


In [None]:
#######################################
Input: 
geno.csv(csv file with genotypes, unordened per sex)

Function: 
The goal of this cel is to order all columns from male to female and to change all values to strings in order to turn the
strings to unique integers later on, which is needed for the chi square test.

Output:
ordered_csv.csv(csv file with columns ordered from males to females and with all values in string format)
#######################################

In [1]:
import pandas as pd
#read csv in python
df = pd.read_csv("/workspace/hramzr/Fisher_tests/geno.csv", index_col=0)
df
#insert order of male to female
df = df[["01_group", "03_group", "05_group", "09_group",
         "11_group", "12_group", "13_group", "02_group",
         "04_group", "06_group", "07_group", "08_group", "10_group"]]

df = df.applymap(str)
df.to_csv("ordered_csv.csv", sep='\t', encoding='utf-8')

In [None]:
#######################################
Input: 
ordered_csv.csv(file with ordered genotype BAM groups, header included)

Function: 
remove column identifiers as these are not necessary for the factorization step which will come next, 
as this will happen per line.

output:
loopfile.txt(file ready to be looped line for line with genotypes, with the headers removed)
#######################################

In [1]:
cat ordered_csv.csv | tail -n +2 > loopfile.txt

In [None]:
#######################################
Input:
Loopfile.txt(file with genotypes per region across sex groups.

Function: 
1. Read each line with genotypes per region and factorize the strings to unique integers, for them to be read by the chi square test.
2. Get a count of unique integers per gender to make a dynamic table.
3. Run this table and store the p-value along with the scaffold name and position in a text file called chi_data.

output:
chi_data(file with scaffold name, pos and p_value per region)
#######################################

In [7]:
#Factorize the strings to unique integers
import pandas as pd
f = open("loopfile.txt", "r")
f = f.readlines()

list_of_unique_indexes=[]
for line in f:
    line = line.split()
    values = line[1:]
    s = pd.Series(values)
    s = pd.factorize(s)[0] + 1
    final_line = s.tolist()
    final_line.insert(0,line[0])
    list_of_unique_indexes.append(final_line)

import numpy as np
import scipy.stats as stats
import collections
f = open("chi_data", "w")
for line in list_of_unique_indexes:
    
#split line in male and female variants
    male = line[1:8]
    female = line[8:]
    
#Count each unique integer per variant and put them in a dictionary per male and female next make a list of lists with 
#every unique value's counts per gender
    varlist= []
    counterM=collections.Counter(male)
    counterF=collections.Counter(female)
    for i in range(max(line[1:])):
        varlist.append([counterM[i+1], counterF[i+1]])

#Turn the list of lists to a np.array and calculate the chi2_stat, p_val, dof, ex with the chi_square test.
    table = np.array(varlist)
    chi2_stat, p_val, dof, ex = stats.chi2_contingency(table)    

#p_value 1.0 means no difference between samples lower than 1.0 means a difference the lower the more difference
    
#write scaffold name, pos and p_value to chi_data out  
    out = line[0].split("_")
    out.append(p_val)
    finallist =[out[0], int(out[1]), out[2]]
    f.write(str(finallist).replace("[", "").replace("]", "").replace(",", "")+"\n")
f.close()
    


'trevally000001' 23 0.9359970601826556

'trevally000001' 69 0.514158251091148

'trevally000001' 75 0.9359970601826556

'trevally000001' 91 0.6764871602580358

'trevally000001' 142 0.514158251091148

'trevally000001' 146 0.43060278747149705

'trevally000001' 227 0.8789011506359462

'trevally000001' 251 0.9359970601826556

'trevally000001' 282 0.5678354775547328

'trevally000001' 332 0.9359970601826556

'trevally000001' 430 0.3631326759586153

'trevally000001' 440 0.9359970601826556

'trevally000001' 516 0.9359970601826556

'trevally000001' 549 0.6764871602580358

'trevally000001' 561 0.9359970601826556

'trevally000001' 595 0.9359970601826556

'trevally000001' 644 0.6285839333986252

'trevally000001' 696 0.3702944864013925

'trevally000001' 714 0.3736783079498954

'trevally000001' 718 0.9359970601826556

'trevally000001' 723 0.9359970601826556

'trevally000001' 734 0.514158251091148

'trevally000001' 785 0.9359970601826556

'trevally000001' 786 0.3631326759586153

'trevally000001' 865 0


'trevally000003' 957 0.054191247144972116

'trevally000003' 960 1.0

'trevally000003' 961 0.24276185249549598

'trevally000003' 991 1.0

'trevally000003' 1010 0.6764871602580358

'trevally000003' 1019 0.9359970601826556

'trevally000003' 1022 0.9359970601826556

'trevally000003' 1027 0.9359970601826556

'trevally000003' 1033 0.6764871602580357

'trevally000003' 1035 0.9359970601826556

'trevally000003' 1049 0.9359970601826556

'trevally000003' 1053 0.9359970601826556

'trevally000003' 1054 0.9359970601826556

'trevally000003' 1057 0.9359970601826556

'trevally000003' 1062 0.9359970601826556

'trevally000003' 1064 0.9359970601826556

'trevally000003' 1065 0.514158251091148

'trevally000003' 1091 0.35566979104428165

'trevally000003' 1677 0.514158251091148

'trevally000003' 1687 0.9359970601826556

'trevally000004' 35 0.8789011506359462

'trevally000004' 37 0.8789011506359462

'trevally000004' 72 0.514158251091148

'trevally000004' 73 0.6764871602580357

'trevally000004' 93 0.9359970601

'trevally000008' 587 0.6285839333986252

'trevally000008' 615 0.15663894451067534

'trevally000008' 618 0.8259364898221998

'trevally000008' 626 0.9359970601826556

'trevally000008' 630 0.8789011506359462

'trevally000008' 637 0.9359970601826556

'trevally000008' 649 0.9359970601826556

'trevally000008' 652 0.6721846346710162

'trevally000008' 660 0.6764871602580358

'trevally000008' 671 0.8789011506359462

'trevally000008' 686 0.9359970601826556

'trevally000008' 693 0.9359970601826556

'trevally000008' 713 0.529017009340737

'trevally000008' 791 0.7638243153154629

'trevally000008' 803 0.9359970601826556

'trevally000008' 806 0.18091091216056798

'trevally000008' 826 0.24276185249549598

'trevally000008' 847 0.9359970601826556

'trevally000008' 877 0.9359970601826556

'trevally000008' 884 0.5563292594457783

'trevally000008' 899 0.9359970601826556

'trevally000008' 907 0.9359970601826556

'trevally000008' 913 0.3214476320388769

'trevally000008' 971 0.6764871602580358

'trevally00000


'trevally000012' 870 0.9359970601826556

'trevally000012' 885 0.6259095498142319

'trevally000012' 905 0.35916579293650647

'trevally000012' 921 0.9359970601826556

'trevally000012' 927 0.6764871602580358

'trevally000012' 1014 0.514158251091148

'trevally000012' 1071 0.6268730492956986

'trevally000012' 1116 0.8789011506359462

'trevally000012' 1119 0.9359970601826556

'trevally000012' 1136 0.32889154391663505

'trevally000012' 1247 0.6835494007988195

'trevally000012' 1285 0.9359970601826556

'trevally000012' 1301 0.29559943537737077

'trevally000012' 1314 0.9359970601826556

'trevally000012' 1321 0.6764871602580357

'trevally000012' 1324 0.11755217329895379

'trevally000012' 1326 0.021983561324250465

'trevally000012' 1349 0.5563292594457783

'trevally000012' 1377 0.5140286938489835

'trevally000012' 1419 0.4650324029036156

'trevally000012' 1422 0.441580530368176

'trevally000012' 1427 0.3736783079498954

'trevally000012' 1432 0.1099272486464637

'trevally000012' 1435 0.3736783079


'trevally000016' 3328 0.6285839333986252

'trevally000016' 3331 0.6285839333986252

'trevally000016' 3332 0.9673806070043549

'trevally000016' 3357 0.8790018699239216

'trevally000016' 3389 0.9399724037096165

'trevally000016' 3462 0.9359970601826556

'trevally000016' 3525 0.1401804633210659

'trevally000016' 3536 0.514158251091148

'trevally000016' 3569 0.8259364898221998

'trevally000016' 3575 0.8790018699239217

'trevally000016' 3665 0.9359970601826556

'trevally000016' 3680 0.8259364898221998

'trevally000016' 3819 0.9359970601826556

'trevally000016' 3862 0.6764871602580358

'trevally000016' 3888 0.6285839333986252

'trevally000016' 3924 0.529017009340737

'trevally000016' 3935 0.8185279603939761

'trevally000016' 3937 0.514158251091148

'trevally000016' 3940 0.3631326759586153

'trevally000016' 3963 0.9359970601826556

'trevally000016' 3976 0.4888137167029959

'trevally000016' 3980 0.9359970601826556

'trevally000016' 3984 0.6764871602580358

'trevally000016' 3992 0.427873787410

'trevally000019' 2680 0.3736783079498954

'trevally000019' 2686 0.514158251091148

'trevally000019' 2699 0.8259364898221998

'trevally000019' 2705 0.7638243153154629

'trevally000019' 2716 0.514158251091148

'trevally000019' 2720 0.8789011506359462

'trevally000019' 2726 0.9359970601826556

'trevally000019' 2731 0.9359970601826556

'trevally000019' 2732 0.6764871602580358

'trevally000019' 2738 0.17531743847654538

'trevally000019' 2747 0.514158251091148

'trevally000019' 2761 0.9359970601826556

'trevally000019' 2763 0.9359970601826556

'trevally000019' 2773 0.8789011506359462

'trevally000019' 2775 0.9359970601826556

'trevally000019' 2812 0.8789011506359462

'trevally000019' 2814 0.3736783079498954

'trevally000019' 2822 0.8259364898221998

'trevally000019' 2831 0.9359970601826556

'trevally000019' 2837 0.3736783079498954

'trevally000019' 2845 0.9399724037096165

'trevally000019' 2860 0.9359970601826556

'trevally000019' 2874 0.8259364898221998

'trevally000019' 2919 0.935997060182


'trevally000022' 1405 0.514158251091148

'trevally000022' 1406 1.0

'trevally000022' 1414 0.514158251091148

'trevally000022' 1415 0.514158251091148

'trevally000022' 1417 1.0

'trevally000022' 1420 0.8259364898221998

'trevally000022' 1436 0.514158251091148

'trevally000022' 1441 1.0

'trevally000022' 1461 0.6764871602580357

'trevally000022' 1468 0.8789011506359462

'trevally000022' 1475 0.8259364898221998

'trevally000022' 1478 0.18091091216056784

'trevally000022' 1480 0.4147624998807684

'trevally000022' 1493 0.6764871602580358

'trevally000022' 1494 0.6764871602580358

'trevally000022' 1501 1.0

'trevally000022' 1504 0.6764871602580358

'trevally000022' 1511 0.9359970601826556

'trevally000022' 1515 0.6764871602580357

'trevally000022' 1519 0.5036709977086684

'trevally000022' 1616 0.4147624998807684

'trevally000022' 1617 1.0

'trevally000022' 1640 0.35566979104428165

'trevally000022' 1641 0.3736783079498954

'trevally000022' 1652 1.0

'trevally000022' 1669 0.8259364898221998


'trevally000023' 2730 0.9359970601826556

'trevally000023' 2761 0.3631326759586153

'trevally000023' 2793 0.9359970601826556

'trevally000023' 2799 0.3631326759586153

'trevally000023' 2823 0.6764871602580358

'trevally000023' 2841 0.9359970601826556

'trevally000023' 2853 0.6764871602580358

'trevally000023' 2857 0.514158251091148

'trevally000023' 2873 0.514158251091148

'trevally000023' 2882 0.3631326759586153

'trevally000023' 2884 0.9359970601826556

'trevally000023' 2886 0.9359970601826556

'trevally000023' 2899 0.9359970601826556

'trevally000023' 2911 0.3631326759586153

'trevally000023' 2931 0.9359970601826556

'trevally000023' 2963 0.9359970601826556

'trevally000023' 2967 0.8789011506359462

'trevally000023' 3001 0.7638243153154629

'trevally000023' 3024 0.1314537067721295

'trevally000023' 3025 0.1314537067721295

'trevally000023' 3039 0.4865284585270381

'trevally000023' 3050 0.8789011506359462

'trevally000023' 3054 0.8789011506359462

'trevally000023' 3067 0.935997060182

'trevally000027' 610 0.529017009340737

'trevally000027' 710 0.9359970601826556

'trevally000027' 718 0.8259364898221998

'trevally000027' 753 0.4888137167029959

'trevally000027' 952 0.514158251091148

'trevally000027' 1000 0.9359970601826556

'trevally000027' 1027 0.3214476320388769

'trevally000027' 1074 0.9359970601826556

'trevally000027' 1085 0.514158251091148

'trevally000027' 1119 0.9359970601826556

'trevally000027' 1120 0.8259364898221998

'trevally000027' 1124 0.8789011506359462

'trevally000027' 1127 0.9399724037096165

'trevally000027' 1128 0.8789011506359462

'trevally000027' 1166 0.7638243153154629

'trevally000027' 1169 0.9359970601826556

'trevally000027' 1175 0.9829512361908932

'trevally000027' 1186 0.5140286938489835

'trevally000027' 1215 0.514158251091148

'trevally000027' 1228 0.9359970601826556

'trevally000027' 1231 0.514158251091148

'trevally000027' 1240 0.20208316663617865

'trevally000027' 1250 0.5026653251776234

'trevally000027' 1262 0.4805490504936726

'

'trevally000033' 1125 0.514158251091148

'trevally000033' 1167 0.9359970601826556

'trevally000033' 1235 0.8789011506359462

'trevally000033' 1339 0.9359970601826556

'trevally000033' 1364 0.9359970601826556

'trevally000033' 1368 0.36313267595861526

'trevally000033' 1382 0.14079524896858192

'trevally000033' 1385 0.14079524896858192

'trevally000033' 1386 0.9359970601826556

'trevally000033' 1389 0.9359970601826556

'trevally000033' 1429 0.8789011506359462

'trevally000033' 1455 0.9359970601826556

'trevally000033' 1468 0.8789011506359462

'trevally000033' 1555 0.8789011506359462

'trevally000033' 1566 0.9359970601826556

'trevally000033' 1570 0.9359970601826556

'trevally000033' 1578 0.6764871602580357

'trevally000033' 1604 0.3437443237526888

'trevally000033' 1636 0.9359970601826556

'trevally000033' 1692 0.9359970601826556

'trevally000033' 1721 0.10674989058557591

'trevally000033' 1814 0.6764871602580357

'trevally000033' 1886 0.9359970601826556

'trevally000033' 1942 0.9359970

'trevally000038' 614 0.9359970601826556

'trevally000038' 617 0.3736783079498954

'trevally000038' 674 0.21393311987927358

'trevally000038' 687 0.6285839333986252

'trevally000038' 742 0.9359970601826556

'trevally000038' 749 0.24276185249549598

'trevally000038' 758 0.1727313214967539

'trevally000038' 767 0.8789011506359462

'trevally000038' 770 0.3788269505078057

'trevally000038' 776 0.6764871602580357

'trevally000038' 820 0.9359970601826556

'trevally000038' 822 0.7638243153154629

'trevally000038' 837 0.8259364898221998

'trevally000038' 844 0.6764871602580357

'trevally000038' 852 0.9359970601826556

'trevally000038' 857 0.8789011506359462

'trevally000038' 859 0.8789011506359462

'trevally000038' 868 0.7638243153154629

'trevally000038' 886 0.10465707307355392

'trevally000038' 899 0.5026653251776234

'trevally000038' 915 0.2922184988617353

'trevally000038' 938 0.6764871602580358

'trevally000038' 942 0.8259364898221998

'trevally000038' 943 0.6764871602580357

'trevally0000

'trevally000042' 2270 0.514158251091148

'trevally000042' 2294 0.8259364898221998

'trevally000042' 2310 0.9359970601826556

'trevally000042' 2321 0.8789011506359462

'trevally000042' 2322 0.8789011506359462

'trevally000042' 2324 0.35566979104428165

'trevally000042' 2330 0.9359970601826556

'trevally000042' 2336 0.3631326759586153

'trevally000042' 2337 1.0

'trevally000042' 2342 0.514158251091148

'trevally000042' 2384 0.514158251091148

'trevally000042' 2417 0.514158251091148

'trevally000042' 2420 0.514158251091148

'trevally000042' 2458 0.32144763203887683

'trevally000042' 2477 0.36313267595861526

'trevally000042' 2508 0.4326078826335553

'trevally000042' 2696 0.9359970601826556

'trevally000042' 2703 0.6721846346710162

'trevally000042' 2716 0.9359970601826556

'trevally000042' 2871 0.514158251091148

'trevally000042' 2892 0.9359970601826556

'trevally000042' 2973 0.9359970601826556

'trevally000042' 2989 0.6764871602580358

'trevally000042' 3043 0.9359970601826556

'trevally0


'trevally000044' 1039 0.14703749964251972

'trevally000044' 1048 0.514158251091148

'trevally000044' 1053 0.4902234467843054

'trevally000044' 1080 0.514158251091148

'trevally000044' 1103 0.9359970601826556

'trevally000044' 1113 0.9359970601826556

'trevally000044' 1115 0.7091623242446933

'trevally000044' 1164 0.8789011506359462

'trevally000044' 1238 0.9359970601826556

'trevally000044' 1241 0.0840628558369375

'trevally000044' 1245 0.514158251091148

'trevally000044' 1249 0.3437443237526888

'trevally000044' 1257 0.514158251091148

'trevally000044' 1278 0.047310623605537734

'trevally000044' 1282 0.9359970601826556

'trevally000044' 1317 0.9359970601826556

'trevally000044' 1371 0.9359970601826556

'trevally000044' 1409 0.5290170093407371

'trevally000044' 1422 0.6285839333986252

'trevally000044' 1424 0.9359970601826556

'trevally000044' 1425 0.9359970601826556

'trevally000044' 1433 0.8789011506359462

'trevally000044' 1463 0.529017009340737

'trevally000044' 1519 0.24276185249


'trevally000047' 1665 0.9359970601826556

'trevally000047' 1668 0.9359970601826556

'trevally000047' 1672 0.1727313214967539

'trevally000047' 1675 0.7037550694162538

'trevally000047' 1686 0.9359970601826556

'trevally000047' 1687 0.6764871602580357

'trevally000047' 1693 0.514158251091148

'trevally000047' 1706 0.329239645449073

'trevally000047' 1725 0.514158251091148

'trevally000047' 1729 0.36313267595861526

'trevally000047' 1742 0.3214476320388769

'trevally000047' 1756 0.43060278747149705

'trevally000047' 1779 0.8790018699239217

'trevally000047' 1780 0.514158251091148

'trevally000047' 1797 0.9399724037096165

'trevally000047' 1805 0.9359970601826556

'trevally000047' 1813 0.8789011506359462

'trevally000047' 1814 0.6764871602580358

'trevally000047' 1839 0.6764871602580358

'trevally000047' 1842 0.8259364898221998

'trevally000047' 1860 0.8789011506359462

'trevally000047' 1861 0.514158251091148

'trevally000047' 1865 0.514158251091148

'trevally000047' 1866 0.5141582510911

'trevally000053' 1469 0.514158251091148

'trevally000053' 1501 0.6764871602580357

'trevally000053' 1520 0.514158251091148

'trevally000053' 1527 0.29559943537737077

'trevally000053' 1540 0.13778764150046086

'trevally000053' 1543 0.3631326759586153

'trevally000053' 1544 0.3631326759586153

'trevally000053' 1545 0.3631326759586153

'trevally000053' 1549 0.5563292594457783

'trevally000053' 1550 0.3066347213170795

'trevally000053' 1551 0.3631326759586153

'trevally000053' 1556 0.690353124574046

'trevally000053' 1561 0.2552476618632109

'trevally000053' 1563 0.3631326759586153

'trevally000053' 1571 0.29559943537737077

'trevally000053' 1575 0.5026653251776234

'trevally000053' 1581 0.26730728476365195

'trevally000053' 1599 0.5026653251776234

'trevally000053' 1609 0.8789011506359462

'trevally000053' 1615 0.8259364898221998

'trevally000053' 1622 0.9359970601826556

'trevally000053' 1623 0.29559943537737077

'trevally000053' 1625 0.11755217329895368

'trevally000053' 1633 0.1175521


'trevally000059' 1289 0.514158251091148

'trevally000059' 1304 0.8259364898221998

'trevally000059' 1305 0.9829512361908932

'trevally000059' 1319 0.5290170093407369

'trevally000059' 1338 0.8789011506359462

'trevally000059' 1339 0.8789011506359462

'trevally000059' 1342 0.9673806070043549

'trevally000059' 1346 0.7638243153154629

'trevally000059' 1365 0.7638243153154629

'trevally000059' 1369 0.8789011506359462

'trevally000059' 1379 0.9359970601826556

'trevally000059' 1381 0.9359970601826556

'trevally000059' 1398 0.8790018699239217

'trevally000059' 1403 0.23260393095626689

'trevally000059' 1438 0.15663894451067534

'trevally000059' 1443 0.514158251091148

'trevally000059' 1477 0.9359970601826556

'trevally000059' 1487 0.9359970601826556

'trevally000059' 1491 0.35566979104428165

'trevally000059' 1536 0.6917845431853027

'trevally000059' 1547 0.29559943537737077

'trevally000059' 1565 0.514158251091148

'trevally000060' 128 0.24581535773822294

'trevally000060' 156 0.139009321

'trevally000064' 2781 0.4495073046443976

'trevally000064' 2792 0.1879783332566671

'trevally000064' 2793 0.1879783332566671

'trevally000064' 2814 0.32144763203887683

'trevally000064' 2815 0.21393311987927355

'trevally000064' 2824 0.5290170093407369

'trevally000064' 2844 0.6285839333986252

'trevally000064' 2848 0.3214476320388769

'trevally000064' 2860 0.6764871602580358

'trevally000064' 2881 0.5026653251776234

'trevally000064' 2892 0.9359970601826556

'trevally000064' 2893 0.9359970601826556

'trevally000064' 2910 0.8789011506359462

'trevally000064' 2914 0.6285839333986252

'trevally000064' 2915 0.9359970601826556

'trevally000064' 2947 0.5026653251776234

'trevally000064' 2948 0.8789011506359462

'trevally000064' 3070 0.9359970601826556

'trevally000064' 3137 0.7638243153154629

'trevally000064' 3139 0.7638243153154629

'trevally000064' 3161 0.514158251091148

'trevally000064' 3182 0.3631326759586153

'trevally000064' 3200 0.4495073046443976

'trevally000064' 3207 0.676487160

'trevally000067' 2574 0.514158251091148

'trevally000067' 2623 0.514158251091148

'trevally000067' 2651 0.9359970601826556

'trevally000067' 2716 0.9359970601826556

'trevally000067' 2744 0.514158251091148

'trevally000067' 2801 0.3736783079498954

'trevally000067' 2802 0.3736783079498954

'trevally000067' 2842 0.9359970601826556

'trevally000067' 2862 0.8789011506359462

'trevally000067' 2869 0.3736783079498954

'trevally000067' 2901 0.6764871602580357

'trevally000067' 2927 0.514158251091148

'trevally000067' 3174 0.9359970601826556

'trevally000067' 3181 0.9359970601826556

'trevally000067' 3187 0.5290170093407371

'trevally000067' 3204 0.2922184988617353

'trevally000067' 3252 0.9359970601826556

'trevally000067' 3257 0.056215012660124526

'trevally000067' 3265 0.9359970601826556

'trevally000067' 3274 0.9359970601826556

'trevally000067' 3302 0.9359970601826556

'trevally000067' 3327 0.9359970601826556

'trevally000067' 3387 0.3736783079498954

'trevally000067' 3397 0.763824315315

'trevally000070' 6386 0.9359970601826556

'trevally000070' 6392 1.0

'trevally000070' 6396 0.9359970601826556

'trevally000070' 6400 1.0

'trevally000070' 6501 0.514158251091148

'trevally000070' 6566 0.6721846346710162

'trevally000070' 6667 0.9359970601826556

'trevally000070' 6739 0.6764871602580357

'trevally000070' 7037 1.0

'trevally000070' 7048 0.9359970601826556

'trevally000070' 7054 0.8259364898221998

'trevally000070' 7063 1.0

'trevally000070' 7068 1.0

'trevally000070' 7075 1.0

'trevally000070' 7089 0.8259364898221998

'trevally000070' 7124 1.0

'trevally000070' 7129 1.0

'trevally000070' 7135 0.6764871602580357

'trevally000070' 7174 1.0

'trevally000070' 7196 1.0

'trevally000070' 7197 1.0

'trevally000070' 7203 1.0

'trevally000070' 7225 1.0

'trevally000070' 7233 1.0

'trevally000070' 7240 1.0

'trevally000070' 7242 1.0

'trevally000070' 7245 1.0

'trevally000070' 7265 1.0

'trevally000070' 7268 1.0

'trevally000070' 7284 0.514158251091148

'trevally000070' 7293 1.0



'trevally000070' 21360 0.10465707307355394

'trevally000070' 21381 0.030741465832772142

'trevally000070' 21491 0.9359970601826556

'trevally000070' 21492 0.514158251091148

'trevally000070' 21549 0.4147624998807684

'trevally000070' 21656 0.9359970601826556

'trevally000070' 21690 0.9359970601826556

'trevally000070' 21743 0.8789011506359462

'trevally000070' 21745 0.8789011506359462

'trevally000070' 21789 0.9359970601826556

'trevally000070' 22015 0.8789011506359462

'trevally000070' 22032 0.3736783079498954

'trevally000070' 22033 0.3736783079498954

'trevally000070' 22037 0.42799421699138807

'trevally000070' 22069 0.43060278747149705

'trevally000070' 22073 0.5290170093407369

'trevally000070' 22077 0.8789011506359462

'trevally000070' 22082 0.6764871602580357

'trevally000070' 22101 0.7638243153154629

'trevally000070' 22105 0.6764871602580358

'trevally000070' 22112 1.0

'trevally000070' 22114 0.8789011506359462

'trevally000070' 22122 0.7638243153154629

'trevally000070' 22130

'trevally000070' 30251 0.24276185249549598

'trevally000070' 30266 0.9359970601826556

'trevally000070' 30276 0.514158251091148

'trevally000070' 30576 0.514158251091148

'trevally000070' 30664 0.5140286938489835

'trevally000070' 30669 0.1727313214967539

'trevally000070' 30697 0.8789011506359462

'trevally000070' 30713 0.3313536057439

'trevally000070' 30914 0.9359970601826556

'trevally000070' 30969 0.8789011506359462

'trevally000070' 30972 0.9359970601826556

'trevally000070' 31071 0.9359970601826556

'trevally000070' 31209 0.514158251091148

'trevally000070' 31250 0.9359970601826556

'trevally000070' 31300 0.9359970601826556

'trevally000070' 31370 0.9359970601826556

'trevally000070' 31377 0.9359970601826556

'trevally000070' 31401 0.9359970601826556

'trevally000070' 31430 0.9359970601826556

'trevally000070' 31456 0.9359970601826556

'trevally000070' 31495 0.8259364898221998

'trevally000070' 31542 0.9359970601826556

'trevally000070' 31587 0.9359970601826556

'trevally000070'

'trevally000070' 45569 0.9359970601826556

'trevally000070' 45583 0.9359970601826556

'trevally000070' 45736 0.42799421699138795

'trevally000070' 45748 0.9359970601826556

'trevally000070' 45803 0.9359970601826556

'trevally000070' 45810 0.6285839333986252

'trevally000070' 45814 0.514158251091148

'trevally000070' 45820 0.514158251091148

'trevally000070' 45879 0.08406285583693758

'trevally000070' 46054 0.8789011506359462

'trevally000070' 46076 0.3005971968216186

'trevally000070' 46093 0.8789011506359462

'trevally000070' 46101 0.514158251091148

'trevally000070' 46102 0.514158251091148

'trevally000070' 46103 0.7638243153154629

'trevally000070' 46132 0.9359970601826556

'trevally000070' 46143 0.514158251091148

'trevally000070' 46144 0.9359970601826556

'trevally000070' 46160 0.8789011506359462

'trevally000070' 46185 0.9359970601826556

'trevally000070' 46214 0.4147624998807684

'trevally000070' 46241 0.8789011506359462

'trevally000070' 46358 0.32144763203887683

'trevally0000


'trevally000070' 58978 0.9359970601826556

'trevally000070' 58984 0.9359970601826556

'trevally000070' 59024 0.1727313214967539

'trevally000070' 59064 0.14079524896858192

'trevally000070' 59091 0.3736783079498954

'trevally000070' 59092 0.3736783079498954

'trevally000070' 59154 0.9359970601826556

'trevally000070' 59205 0.7638243153154629

'trevally000070' 59207 0.9359970601826556

'trevally000070' 59217 0.9359970601826556

'trevally000070' 59255 0.5140286938489835

'trevally000070' 59264 0.7638243153154629

'trevally000070' 59313 0.9359970601826556

'trevally000070' 59397 0.514158251091148

'trevally000070' 59414 0.9359970601826556

'trevally000070' 59417 0.9359970601826556

'trevally000070' 59457 0.36313267595861526

'trevally000070' 59477 0.9359970601826556

'trevally000070' 59492 0.9359970601826556

'trevally000070' 59510 0.24276185249549598

'trevally000070' 59520 0.9359970601826556

'trevally000070' 59537 0.9359970601826556

'trevally000070' 59629 0.32144763203887683

'treval

'trevally000070' 69852 0.9359970601826556

'trevally000070' 69872 0.11755217329895368

'trevally000070' 69928 0.24997140937526238

'trevally000070' 69944 0.35566979104428165

'trevally000070' 69955 0.24276185249549598

'trevally000070' 69993 0.24997140937526238

'trevally000070' 70028 0.9359970601826556

'trevally000070' 70036 0.9359970601826556

'trevally000070' 70047 0.3736783079498954

'trevally000070' 70049 0.9359970601826556

'trevally000070' 70061 0.24997140937526238

'trevally000070' 70075 0.2518836816494942

'trevally000070' 70089 0.1413332710475948

'trevally000070' 70334 0.1879783332566671

'trevally000070' 70345 0.10279690843528638

'trevally000070' 70350 0.10279690843528644

'trevally000070' 70364 0.10279690843528644

'trevally000070' 70379 0.1879783332566671

'trevally000070' 70410 0.10279690843528644

'trevally000070' 70422 0.1879783332566671

'trevally000070' 70440 0.1879783332566671

'trevally000070' 70442 0.1879783332566671

'trevally000070' 70484 0.10279690843528644



'trevally000070' 79210 0.514158251091148

'trevally000070' 79215 0.9359970601826556

'trevally000070' 79224 1.0

'trevally000070' 79230 0.43060278747149705

'trevally000070' 79232 0.9359970601826556

'trevally000070' 79234 0.9359970601826556

'trevally000070' 79239 0.3736783079498954

'trevally000070' 79241 0.9359970601826556

'trevally000070' 79243 0.3736783079498954

'trevally000070' 79250 0.3736783079498954

'trevally000070' 79252 0.3736783079498954

'trevally000070' 79258 0.7638243153154629

'trevally000070' 79260 0.9359970601826556

'trevally000070' 79268 0.2518836816494942

'trevally000070' 79273 0.9359970601826556

'trevally000070' 79282 0.9673806070043549

'trevally000070' 79301 0.6764871602580358

'trevally000070' 79302 0.8789011506359462

'trevally000070' 79310 0.514158251091148

'trevally000070' 79317 0.514158251091148

'trevally000070' 79322 0.3631326759586153

'trevally000070' 79328 0.9359970601826556

'trevally000070' 79329 0.9359970601826556

'trevally000070' 79336 0.935

'trevally000070' 89722 0.9359970601826556

'trevally000070' 89883 0.514158251091148

'trevally000070' 89891 0.046197685439944985

'trevally000070' 89910 0.9359970601826556

'trevally000070' 89941 0.9359970601826556

'trevally000070' 89985 0.9359970601826556

'trevally000070' 90110 0.9359970601826556

'trevally000070' 90148 0.3631326759586153

'trevally000070' 90156 0.3437443237526889

'trevally000070' 90172 0.9359970601826556

'trevally000070' 90181 0.9359970601826556

'trevally000070' 90183 0.9359970601826556

'trevally000070' 90206 0.24276185249549598

'trevally000070' 90208 0.24276185249549598

'trevally000070' 90212 0.10465707307355394

'trevally000070' 90213 0.10465707307355394

'trevally000070' 90222 0.10465707307355394

'trevally000070' 90227 0.10465707307355394

'trevally000070' 90232 0.24276185249549598

'trevally000070' 90240 0.9359970601826556

'trevally000070' 90241 0.9359970601826556

'trevally000070' 90247 0.9359970601826556

'trevally000070' 90261 0.9359970601826556

'tr

'trevally000070' 97911 0.9359970601826556

'trevally000070' 97912 0.514158251091148

'trevally000070' 97913 0.514158251091148

'trevally000070' 97916 0.9359970601826556

'trevally000070' 97940 0.514158251091148

'trevally000070' 97978 0.9359970601826556

'trevally000070' 97987 0.10279690843528638

'trevally000070' 98053 0.6764871602580357

'trevally000070' 98070 0.5290170093407369

'trevally000070' 98108 0.0840628558369375

'trevally000070' 98133 0.0840628558369375

'trevally000070' 98138 0.6764871602580357

'trevally000070' 98148 0.15372053274316938

'trevally000070' 98153 0.9359970601826556

'trevally000070' 98165 0.9359970601826556

'trevally000070' 98211 0.2518836816494942

'trevally000070' 98213 0.2518836816494942

'trevally000070' 98219 0.24581535773822294

'trevally000070' 98235 0.9359970601826556

'trevally000070' 98275 0.38012242588687084

'trevally000070' 98278 0.329239645449073

'trevally000070' 98309 0.9359970601826556

'trevally000070' 98334 0.514158251091148

'trevally000


'trevally000070' 104302 0.8259364898221998

'trevally000070' 104306 0.35566979104428165

'trevally000070' 104310 0.35566979104428165

'trevally000070' 104322 1.0

'trevally000070' 104335 0.9359970601826556

'trevally000070' 104336 0.9359970601826556

'trevally000070' 104337 0.24276185249549598

'trevally000070' 104353 0.514158251091148

'trevally000070' 104361 1.0

'trevally000070' 104375 0.6764871602580358

'trevally000070' 104377 0.9359970601826556

'trevally000070' 104405 0.8789011506359462

'trevally000070' 104409 0.9359970601826556

'trevally000070' 104410 0.8789011506359462

'trevally000070' 104431 0.9359970601826556

'trevally000070' 104445 0.8789011506359462

'trevally000070' 104453 1.0

'trevally000070' 104457 0.9359970601826556

'trevally000070' 104463 0.3736783079498954

'trevally000070' 104464 0.9359970601826556

'trevally000070' 104488 0.7638243153154629

'trevally000070' 104496 0.8789011506359462

'trevally000070' 104498 0.6764871602580357

'trevally000070' 104500 1.0

'

'trevally000070' 110927 0.9359970601826556

'trevally000070' 110928 0.3736783079498954

'trevally000070' 110948 0.10992724864646361

'trevally000070' 110991 0.8789011506359462

'trevally000070' 111019 0.14079524896858192

'trevally000070' 111020 0.9359970601826556

'trevally000070' 111031 0.9359970601826556

'trevally000070' 111048 0.9359970601826556

'trevally000070' 111067 0.2955994353773708

'trevally000070' 111069 0.6917845431853027

'trevally000070' 111120 0.9359970601826556

'trevally000070' 111182 0.6764871602580358

'trevally000070' 111224 0.9359970601826556

'trevally000070' 111244 0.3631326759586153

'trevally000070' 111285 0.9359970601826556

'trevally000070' 111303 0.6764871602580358

'trevally000070' 111312 0.8789011506359462

'trevally000070' 111329 0.6285839333986252

'trevally000070' 111342 0.14079524896858192

'trevally000070' 111358 0.9359970601826556

'trevally000070' 111430 0.529017009340737

'trevally000070' 111480 0.3801224258868708

'trevally000070' 111482 0.3801

In [None]:
#######################################
Function: 
Run the cell above on the cluster for big size computation, as it was tested in reduced size.
#######################################

In [1]:
#run fact and chisq cell above
module load pfr-python3/3.6.5
# module avail
bsub -n 8 -e pyerror.err "python Factorize_and_chisq.py"

Job <52186> is submitted to default queue <normal>.


In [None]:
#######################################
Input:
chi_data(file with scaffold name, pos and p_value per region)

Function: 
Get the average p_value per 500 bases in order to remove outliers and to keep this value similar to that of 
other tests for comparing.   
    
Output:
meansp.txt(file with scaffold name, pos and p_value per 500b region)
#######################################

In [1]:
#means per tick of 500 positions, in order to rid of outliers.
library(data.table)
library(ggplot2)
library(dplyr)

df <- read.table("/workspace/hramzr/Fisher_tests/chi_data", header = FALSE)
col_names <- c("SCAFFOLD", 'POS', "PVAL" )
colnames(df) <- col_names
with_bins <- mutate(df, bin_start=(ceiling(POS/500)*500)-500)
by_bin <- group_by(with_bins, bin_start, SCAFFOLD)
by_bin$POS <- NULL
bin_means <- summarise(by_bin, PVAL = mean(PVAL))
bin_means <- mutate(bin_means, minus_log10_P=-log10(PVAL))
write.table(bin_means, file = "meansp.txt", sep = "\t",
            row.names = FALSE, col.names = FALSE)


Attaching package: 'dplyr'

The following objects are masked from 'package:data.table':

    between, first, last

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



In [None]:
#######################################
Input:
meansp.txt(file with -log10P values and scaffold data)

Function: 
Filter the regions with a -log10P threshold, for plot making and comparative analysis with the other tests.

Output:
var[variable identifier, based on threshold](file with regions that are filtered by a specific -log10P value)
#######################################

In [10]:
#filter out on -log10p threshold
# cat meansp.txt | awk '{ if($4 > 1.5) { print }}'>var1d5
# cat meansp.txt | awk '{ if($4 > 2.0) { print }}'>var2d0
# cat meansp.txt | awk '{ if($4 > 2.5) { print }}'>var2d5
# cat meansp.txt | awk '{ if($4 > 2.8) { print }}'>var2d8
cat meansp.txt | awk '{ if($4 > 1.56) { print }}'>var1d56

26
/powerplant/workspace/hramzr/Fisher_tests


In [None]:
#######################################
Input:
var[1d5, 2d0, 2d5 and 2d8](filtered files with scaffold names, regions and -log10P values)

Function: 
Plot the scaffolds of threshold filtered files.

Output:

Visualised plots.
#######################################

In [None]:
#scatterplot results
import pandas 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


#-log10p 1.5
df  = pd.read_csv("/workspace/hramzr/Fisher_tests/var1d5", delimiter="\t", header=None)
plot1 = df.plot(kind='scatter',x=0 ,y=2,figsize=(10,10)) # scatter plot
plot1.set_ylabel("-log10p")
plot1.set_xlabel("start")
plot1.set_title("Scatterplot position versus confidence rate")

#-log10p 2.0
df  = pd.read_csv("/workspace/hramzr/Fisher_tests/var2d0", delimiter="\t", header=None)
plot2 = df.plot(kind='scatter',x=0 ,y=2,figsize=(10,10)) # scatter plot
plot2.set_ylabel("-log10p")
plot2.set_xlabel("start")
plot2.set_title("Scatterplot position versus confidence rate")

#-log10p 2.5
df  = pd.read_csv("/workspace/hramzr/Fisher_tests/var2d5", delimiter="\t", header=None)
plot2 = df.plot(kind='scatter',x=0 ,y=2,figsize=(10,10)) # scatter plot
plot2.set_ylabel("-log10p")
plot2.set_xlabel("start")
plot2.set_title("Scatterplot position versus confidence rate")

#-log10p 2.8
df  = pd.read_csv("/workspace/hramzr/Fisher_tests/var2d8", delimiter="\t", header=None)
plot2 = df.plot(kind='scatter',x=0 ,y=2,figsize=(10,10)) # scatter plot
plot2.set_ylabel("-log10p")
plot2.set_xlabel("start")
plot2.set_title("Scatterplot position versus confidence rate")