-
Notifications
You must be signed in to change notification settings - Fork 20
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
何伟明(Weiming He)
committed
Jun 20, 2023
1 parent
f358b52
commit 1523483
Showing
18 changed files
with
3,632 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
|
||
VCF2Dis: A new simple and efficient software to calculate p-distance matrix based Variant Call Format | ||
|
||
|
||
1) Introduction | ||
------------ | ||
|
||
This software relies on two other library packages [zlib] | ||
|
||
---------------------- zlib infomation ---------------------------- | ||
If Lib [zlib] do not work | ||
you can download form this website and install it | ||
http://www.zlib.net/ | ||
|
||
|
||
2) linux/Unix/MacOS INSTALL | ||
-------------------------------------- | ||
|
||
Just execute as follows : | ||
tar -zxvf VCF2DisXXX.tar.gz | ||
cd VCF2DisXXX.tar.gz; | ||
sh make.sh | ||
./bin/VCF2Dis | ||
|
||
#Note: If fail to link,try to re-install the libraries zlib | ||
#if Link do not work ,try Re-install the zlib librarys and copy them to the library Dir | ||
|
||
VCF2Dis-xx/src/include/zlib | ||
|
||
|
||
#step3 : | ||
sh make.sh # or [make && make clean] | ||
|
||
4) Contact | ||
email: hewm2008@gmail.com / hewm2008@qq.com | ||
join the QQ Group : 125293663 | ||
|
||
|
||
|
||
######################swimming in the sky and flying in the sea ########################### ## | ||
|
||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
|
||
## 安装 PHYLIPNEW 记要 | ||
|
||
要运行 PHYLIPNEW-3.69.650/bin/fconsense 须先安装<b> PHYLIPNEW-3.69.660 </b>,其步骤如下: | ||
|
||
### 第一步:下载 | ||
cd /public/software/ | ||
wget ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz ./ | ||
wget ftp://emboss.open-bio.org/pub/EMBOSS/PHYLIPNEW-3.69.660.tar.gz ./ | ||
#在 ftp://emboss.open-bio.org/pub/EMBOSS/ 里面下载好 EMBOSS-6.6.0.tar.gz 和 PHYLIPNEW-3.69.660.tar.gz | ||
|
||
|
||
### 第二步:安装EMBOSS ; | ||
tar -xzvf EMBOSS-6.6.0.tar.gz # 解压 | ||
cd EMBOSS-6.6.0/ | ||
./configure --prefix=/public/software/EMBOSS-6.6.0 # [prefix后面接的目录必须是你自己有权限的目录] | ||
make | ||
|
||
## 其中这个软件依赖一些其他包,若系统没有就先安装之,在这打个比如,如报了少 缺libmysqlclient.so.*的库的错 | ||
## 那么请安装之,若有root: 运行: yum install mysql ; yum install mysql-devel即可以安装,若没有到自行安装 | ||
## 现别人已经安装好在这了: /gss1/App_dir/mariadb-10.1.26/bin/mysql 在 /gss1/App_dir/mariadb-10.1.26/lib/里面存在 | ||
## libmysqlclient.*的,即可以直接加载环境变量就行 export LD_LIBRARY_PATH ; | ||
## 在这运行: export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/gss1/App_dir/mariadb-10.1.26/lib/ [可写入: ~/.bashrc 里面,免得每次都要重新加载] | ||
|
||
make install | ||
|
||
### 第三步:安装好EMBOSS以后,安装PHYLIBNEW | ||
|
||
tar -xzvf PHYLIPNEW-3.69.660.tar.gz # 解压 | ||
cp -r /public/software/EMBOSS-6.6.0/lib/* /public/software/PHYLIPNEW-3.69.650/lib/ | ||
cp -r /public/software/EMBOSS-6.6.0/include/* /public/software/PHYLIPNEW-3.69.650/include/ | ||
# 在安装好EMBOSS后 即在 EMBOSS-6.6.0 里面有include 和lib 目录; | ||
# 将EMBOSS-6.60下include里的头文件和lib文件夹copy到 PHYLIBNEW下 [or 加载一些变量 export LD_LIBRARY_PATH ; CFLAGS="-I$prefix/include" ; LDFLAGS="-L$prefix/ ] | ||
./configure --prefix=/public/software/PHYLIPNEW-3.69.650 | ||
make | ||
|
||
## 其中这个软件依赖一些其他包,若系统没有就先安装之,在这打个比例 报了少 LibXaw.soXX的库 | ||
## 在 https://www.x.org/archive/individual/lib/ 下载里面的 libXaw-1.0.13.tar.gz ; | ||
## #解压 libXaw-1.0.13.tar.gz,./configure --prefix=/public/software/PHYLIPNEW-3.69.650 (可以这个目录,免得又重新加载环境变量就行 | ||
## export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/public/software/PHYLIPNEW-3.69.650/lib; | ||
## make ;make install; | ||
|
||
make install | ||
|
||
### 第四步: 安装后好在 PHYLIPNEW-3.69.660/bin/fconsense | ||
#但运行会报 少了在EMBOSS目录少了 fneighbor.acd 的文件 | ||
# 那么在 PHYLIPNEW-3.69.660 找之(find ./ -name fneighbor.acd),正常当在HYLIPNEW-3.69.650/emboss_acd下 | ||
mkdir -p /public/software/EMBOSS-6.6.0/embassy/phylipnew/ ; | ||
ln -s /public/software/PHYLIPNEW-3.69.650/emboss_acd /public/software/EMBOSS-6.6.0/embassy/phylipnew/emboss_acd | ||
# 创建目录 并链接 or 可以copy | ||
|
||
### 其它信息 | ||
若后面再运行报少了xxx.so\.*的话 把这个xxx.so.\*的路径找到并记录下来,直接在 \~/.bashrc加载之,export LD_LIBRARY_PATH=$export LD_LIBRARY_PATH:库所在路径 | ||
如把 <b><i>export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/public/software/PHYLIPNEW-3.69.650/lib; </i></b>写进 <b>\~/.bashrc</b> | ||
免得每次要用都重新加载之 | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
# Install PHYLIPNEW | ||
|
||
</br>To run <b>PHYLIPNEW-3.69.650/bin/fconsense</b> software, <b>PHYLIPNEW-3.69.660</b> must be installed first, and the steps are as follows: | ||
|
||
### Step 1: Download | ||
cd /public/software/ | ||
wget ftp://emboss.open-bio.org/pub/EMBOSS/EMBOSS-6.6.0.tar.gz ./ | ||
wget ftp://emboss.open-bio.org/pub/EMBOSS/PHYLIPNEW-3.69.660.tar.gz ./ | ||
#Download EMBOSS-6.6.0.tar.gz and PHYLIPNEW-3.69.660.tar.gz at [ftp://emboss.open-bio.org/pub/EMBOSS/] | ||
|
||
|
||
### Step 2: Install EMBOSS | ||
tar -xzvf EMBOSS-6.6.0.tar.gz # Unzip package | ||
cd EMBOSS-6.6.0 | ||
./configure --prefix=/public/software/EMBOSS-6.6.0 #[You must have read and write permissions for this Dir] | ||
make | ||
|
||
## This software depends on some other packages. If your system does not have one, install them first. For example, Error with a report missing [libmysqlclient.so.*] library | ||
## then please install it, if you have root: Run:[ yum install mysql; yum install mysql-devel ] can be installed, if not, install it yourself. | ||
## Now others have installed mysqlclient here: [/gss1/App_dir/mariadb-10.1.26/bin/mysql] and [libmysqlclient.*] exists in [/gss1/App_dir/mariadb-10.1.26/lib/] | ||
## That is, you can load the environment variables directly [export LD_LIBRARY_PATH] | ||
## Run here: [export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/gss1/App_dir/mariadb-10.1.26/lib/] [Writable: ~/.bashrc inside, so as not to reload every time] | ||
|
||
make install | ||
|
||
|
||
### Step 3: After installing EMBOSS, install PHYLIBNEW | ||
tar -xzvf PHYLIPNEW-3.69.660.tar.gz #Unzip package | ||
cp -r /public/software/EMBOSS-6.6.0/lib/* /public/software/PHYLIPNEW-3.69.650/lib/ | ||
cp -r /public/software/EMBOSS-6.6.0/include/* /public/software/PHYLIPNEW-3.69.650/include/ | ||
# After successfully installing EMBOSS, there are [include] and [lib] directories in EMBOSS-6.6.0; | ||
# Copy the header file and lib folder in include under EMBOSS-6.60 to PHYLIBNEW [or load some variables :export LD_LIBRARY_PATH; CFLAGS="-I$prefix/include"; LDFLAGS="-L$prefix/] | ||
./configure --prefix=/public/software/PHYLIPNEW-3.69.650 | ||
make | ||
|
||
## This software depends on some other library. If the system does not have it, install them first. Let’s make an analogy here. reporting less [LibXaw.so.*] library. | ||
## Then download [libXaw-1.0.13.tar.gz] in [https://www.x.org/archive/individual/lib/]; | ||
## tar -zxvf libXaw-1.0.13.tar.gz; cd libXaw-1.0.13 ; ./configure --prefix=/public/software/PHYLIPNEW-3.69.650 (This directory can be used to avoid reloading environment variables) | ||
## export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/public/software/PHYLIPNEW-3.69.650/lib; | ||
## make ;make install; | ||
|
||
make install | ||
|
||
|
||
### Step 4: Run PHYLIPNEW-3.69.660/bin/fconsense | ||
# After installation, the first run [PHYLIPNEW-3.69.660/bin/fconsense] will report that the [fneighbor.acd] file is missing in the EMBOSS directory. | ||
# Then find it in PHYLIPNEW-3.69.660 Dir [find ./ -name fneighbor.acd], normally it will under Dir [HYLIPNEW-3.69.650/emboss_acd] | ||
mkdir -p /public/software/EMBOSS-6.6.0/embassy/phylipnew/ ; | ||
ln -s /public/software/PHYLIPNEW-3.69.650/emboss_acd /public/software/EMBOSS-6.6.0/embassy/phylipnew/emboss_acd | ||
# Create directory and link or copy | ||
|
||
|
||
### Other Info: | ||
</br> If [xxx.so.\*] missing is report after running the software later, find and record the path of this [xxx.so.\*] and load it directly in \~/.bashrc, <i> export LD_LIBRARY_PATH=$export LD_LIBRARY_PATH:/Dir_Path_For_Thislibrary_located/;</i> | ||
</br> such writing info <b><i>export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/public/software/PHYLIPNEW-3.69.650/lib;</i></b> to file <b>[\~/.bashrc]</b>; Avoid reloading every time you use it | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,128 @@ | ||
# VCF2Dis | ||
<b>VCF2Dis: A new simple and efficient software to calculate p-distance matrix based Variant Call Format</b> | ||
|
||
### 1) Install | ||
------------ | ||
The <b>new version</b> will be updated and maintained in <b>[hewm2008/VCF2Dis](https://github.com/hewm2008/VCF2Dis)</b>, please click below Link to download the latest version | ||
</br><p align="center"><b>[hewm2008/VCF2Dis](https://github.com/hewm2008/VCF2Dis)</b></p> | ||
<b> [Download](https://github.com/hewm2008/VCF2Dis/archive/v1.50.tar.gz) </b> | ||
|
||
</br> Just [make] or [sh make.sh ] to compile this software.the final software can be found in the Dir <b>[bin/VCF2Dis]</b> | ||
</br> For <b>linux /Unix </b> and <b>macOS</b> | ||
<pre> | ||
tar -zxvf VCF2DisXXX.tar.gz # if Link do not work ,Try <b>re-install</b> [zlib]library | ||
cd VCF2DisXXX; # [zlib] and copy them to the library Dir | ||
sh make.sh; # VCF2Dis-xx/src/include/zlib | ||
./bin/VCF2Dis | ||
</pre> | ||
|
||
**Note:** If fail to link,try to <b>re-install</b> the libraries [**_zlib_**](https://zlib.net/) | ||
|
||
### 2) an Example of nj-tree with no boostrap | ||
------------ | ||
* 1) Parameter description: | ||
```php | ||
Usage: VCF2Dis -InPut <in.vcf> -OutPut <p_dis.mat> | ||
|
||
-InPut <str> Input one or muti GATK VCF genotype File | ||
-OutPut <str> OutPut Sample p-Distance matrix | ||
|
||
-InList <str> Input GATK muti-chr VCF Path List | ||
-SubPop <str> SubGroup SampleList of VCFFile [ALLsample] | ||
-Rand <float> Probability (0-1] for each site to join Calculation [1] | ||
-KeepMF Keep the Middle File diff & Use matrix | ||
|
||
-help Show more help [hewm2008 v1.50] | ||
|
||
``` | ||
|
||
* 2) To Create the p_distance matrix | ||
|
||
``` | ||
# 2.1) To new all the sample p_distance matrix based VCF, run VCF2Dis directly | ||
./bin/VCF2Dis -InPut in.vcf.gz -OutPut p_dis.mat | ||
# ./bin/VCF2Dis -InPut in.fa.gz -OutPut p_dis.mat -InFormat FA | ||
# 2.2) To new sub group sample p_distance matrix ; put their sample name into File sample.list | ||
./bin/VCF2Dis -InPut chr1.vcf.gz chr2.vcf.gz -OutPut p_dis.mat -SubPop sample.list | ||
``` | ||
|
||
* 3) construct nj-tree and present it (need deal with Other software) | ||
|
||
#### method 1 | ||
Choose one of **A/B** | ||
</br> **A.** Upload the web [fneighbor(http://emboss.toulouse.inra.fr/cgi-bin/emboss/fneighbor?_pref_hide_optional=1)](http://emboss.toulouse.inra.fr/cgi-bin/emboss/fneighbor?_pref_hide_optional=1) ,the Click the **Run fneighbor** bottom . then you can get the output file **datafile.treefile** | ||
</br> **B.** Upload the <b>p_dis.mat</b> to the website [fastme (http://www.atgc-montpellier.fr/fastme/)](http://www.atgc-montpellier.fr/fastme/), select <b>Data Type</b> to the <b>Distance matrix </b>,Click the bottom twist <b>execute & email results</b>. you will get the <b>p_dis_mat_fastme-tree.nwk</b> , and Email not mandatory; | ||
|
||
|
||
Run MEGA # The MEGA (http://www.megasoftware.net/) was used to present the phylogenetic tree based this file [p_dis_mat_fastme-tree.nwk] | ||
|
||
#### method 2 | ||
Use the PHYLIPNEW to construct nj-tree | ||
</br>How to Install PHYLIPNEW please Click on <b> [here](https://github.com/hewm2008/VCF2Dis/blob/main/Install.NJ.en.md)</b> or Click on <b>[here(Chinese)](https://github.com/hewm2008/VCF2Dis/blob/main/Install.NJ.cn.md)</b> | ||
``` | ||
# 3.1 Run PHYLIP | ||
# After p_distance done , software PHYLIPNEW 3.69 (http://evolution.genetics.washington.edu/phylip.html) ,with neighbor-joining method can was used to construct the phylogenetic tree on the basis of this p_distance matrix; | ||
PHYLIPNEW-3.69.650/bin/fneighbor -datafile p_dis.matrix -outfile tree.out1.txt -matrixtype s -treetype n -outtreefile tree.out2.tre | ||
# 3.2 Run MEGA | ||
# The MEGA6 (http://www.megasoftware.net/) was used to present the phylogenetic tree based this file [tree.out2.tre] | ||
``` | ||
* 4) you can see the neighbor-joining tree and save it as PDF format | ||
|
||
|
||
### 3) an Example of nj-tree with boostrap | ||
|
||
* 1) muti-run the nj-tree by using put back sampling. | ||
To using the the part of the sites and new the nj-tree as above. Repeat For the <b>NN</b> times. X=(1,2....NN); | ||
|
||
``` | ||
./bin/VCF2Dis -InPut in.vcf.gz -OutPut p_dis_X.mat -Rand 0.25 | ||
PHYLIPNEW-3.69.650/bin/fneighbor -datafile p_dis_X.matrix -outfile tree.out1_X.txt -matrixtype s -treetype n -outtreefile tree.out2_X.tre | ||
``` | ||
|
||
* 2) merge the all the put back sampling NJ-tree and construct boostrap nj-tree. | ||
|
||
``` | ||
cat tree.out2_*.tre > ALLtree_merge.tre | ||
PHYLIPNEW-3.69.650/bin/fconsense -intreefile ALLtree_merge.tre -outfile out -treeprint Y | ||
perl ./bin/percentageboostrapTree.pl ALLtree_merge.treefile NN Final_boostrap.tre | ||
``` | ||
* 3) construct nj-tree and present it (need deal with Other software) | ||
``` | ||
# The MEGA6 (http://www.megasoftware.net/) was used to present the phylogenetic tree based this file Final_boostrap.tre] | ||
``` | ||
------------ | ||
### 4) Introduction | ||
------------ | ||
To new the p_distance matrix besed the VCF file. the more infomation about the p_distance matrix, see <b>[this website.](http://evolution.genetics.washington.edu/phylip/doc/distance.html)</b> | ||
The VCF SNPs datasets were used to calculate p-distance between individuals, according to the follow formula to operate the sample i and sample j genetic distance: | ||
|
||
D_ij=(1/L) * [(sum(d(l)_ij))] | ||
|
||
</br> Where L is the length of regions where SNPs can be identified, and given the alleles at position l are A/C: | ||
``` | ||
d(l)_ij=0.0 if the genotypes of the two individuals were AA and AA; | ||
d(l)_ij=0.5 if the genotypes of the two individuals were AA and AC; | ||
d(l)_ij=0.0 if the genotypes of the two individuals were AC and AC; | ||
d(l)_ij=1.0 if the genotypes of the two individuals were AA and CC; | ||
d(l)_ij=0.0 if the genotypes of the two individuals were CC and CC; | ||
``` | ||
|
||
|
||
|
||
### 5) Results | ||
------------ | ||
some NJ-tree images which I draw in the paper before. | ||
|
||
* [50 Rices NBT](http://www.nature.com/nbt/journal/v30/n1/images/nbt.2050-F1.jpg) | ||
* [31 soybeans NG]( http://www.nature.com/ng/journal/v42/n12/images/ng.715-F1.jpg) | ||
|
||
### 6) Discussing | ||
------------ | ||
- [:email:](https://github.com/hewm2008/VCF2Dis) hewm2008@gmail.com / hewm2008@qq.com | ||
- join the<b><i> QQ Group : 125293663</b></i> | ||
|
||
|
||
######################swimming in the sky and flying in the sea ########################### ## |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/perl -w | ||
use strict; | ||
#explanation:this program is edited to | ||
#edit by hewm; Wed Feb 20 11:02:07 HKT 2019 | ||
#Version 1.0 hewm@genomics.org.cn | ||
|
||
die "Version 1.0\t2019-02-20;\nUsage: $0 <merge.tre><RepeatTime><boostrap.tre>\n" unless (@ARGV ==3); | ||
|
||
#############Befor Start , open the files #################### | ||
|
||
open (IA,"$ARGV[0]") || die "input file can't open $!"; | ||
my $TotalRepeat=$ARGV[1]; | ||
open (OA,">$ARGV[2]") || die "output file can't open $!" ; | ||
|
||
################ Do what you want to do ####################### | ||
$/=";"; | ||
|
||
while(<IA>) | ||
{ | ||
$_=~s/\n//g; | ||
next if ($_ eq ""); | ||
my $Start=0; | ||
my $Now=$Start; | ||
my $Ttue=1; | ||
my $Str=$_ ; | ||
|
||
while($Ttue==1) | ||
{ | ||
$Now=index($Str,":",$Start); | ||
if ($Now==-1) | ||
{ | ||
$Ttue=0; | ||
} | ||
else | ||
{ | ||
my $Length=$Now-$Start; | ||
my $AAA=substr($Str,$Start,$Length); | ||
$Start=$Now+1; | ||
my $NowA=index($Str,",",$Start); | ||
my $NowB=index($Str,")",$Start); | ||
if ($NowA!=-1 && $NowB!=-1) | ||
{ | ||
if ($NowA>$NowB) | ||
{ | ||
$Now=$NowB; | ||
} | ||
else | ||
{ | ||
$Now=$NowA; | ||
} | ||
} | ||
elsif ($NowA==-1 && $NowB==-1) | ||
{ | ||
print "bad Format,some thing wrong!!!\n"; | ||
} | ||
elsif ($NowA==-1 && $NowB!=-1) | ||
{ | ||
$Now=$NowB; | ||
} | ||
elsif ($NowA!=-1 && $NowB==-1) | ||
{ | ||
$Now=$NowA; | ||
} | ||
$Length=$Now-$Start; | ||
my $BBB=substr($Str,$Start,$Length); | ||
$BBB=sprintf ("%.1f",$BBB*100.0/$TotalRepeat); | ||
$Start=$Now; | ||
print OA "$AAA:$BBB" | ||
} | ||
} | ||
my $Length=length($Str); | ||
my $BBB=substr($Str,$Start,$Length); | ||
print OA "$BBB\n"; | ||
} | ||
$/="\n"; | ||
close IA; | ||
close OA ; | ||
|
||
######################swimming in the sky and flying in the sea ########################### |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
#!/bin/sh | ||
#$ -S /bin/sh | ||
#Version1.0 hewm@genomics.org.cn 2017-06-13 | ||
echo Start Time : | ||
date | ||
../bin/VCF2Dis -InPut in.vcf.gz -OutPut p_dis.mat | ||
#../bin/VCF2Dis -InPut in.vcf.gz -OutPut p_dis.mat -SubPop sample.list | ||
echo End Time : | ||
date |
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
S010 | ||
S033 | ||
S186 | ||
S123 | ||
S124 | ||
S011 |
Oops, something went wrong.