/
split.c
104 lines (99 loc) · 2.82 KB
/
split.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
/* bwtool_split - splitting the bigWig into useful pieces */
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <jkweb/common.h>
#include <jkweb/linefile.h>
#include <jkweb/hash.h>
#include <jkweb/options.h>
#include <jkweb/sqlNum.h>
#include <jkweb/basicBed.h>
#include <jkweb/bigWig.h>
#include <beato/bigs.h>
#include "bwtool.h"
void usage_split()
/* Explain usage of the splitting program and exit. */
{
errAbort(
"bwtool split - split the bigWig into chunks\n"
"usage:\n"
" bwtool split size file.bw[:chr:start-end] prefix\n"
"options:\n"
" -min-gap=g if gap is smaller than this, continue to add it\n"
" to the current piece (default 1)\n"
);
}
struct bed *newBed(char *chrom, int start, int end)
{
struct bed *bed;
AllocVar(bed);
bed->chrom = cloneString(chrom);
bed->chromStart = start;
bed->chromEnd = end;
return bed;
}
void bwtool_split(struct hash *options, char *regions, char *size_s, char *bigfile, char *tmp_dir, char *outputfile)
/* bwtool_split - main for the splitting program */
{
struct metaBig *mb = metaBigOpenWithTmpDir(bigfile, tmp_dir, regions);
FILE *output = mustOpen(outputfile, "w");
struct bed *section;
struct bed *splitList = NULL;
int size = 0;
unsigned min_gap = sqlUnsigned((char *)hashOptionalVal(options, "min_gap", "1"));
unsigned chunk_size = sqlUnsigned(size_s);
char chrom[256] = "";
int start = -1, end = 0;
boolean over_size = FALSE;
int ix = 1;
int gap = 0;
for (section = mb->sections; section != NULL; section = section->next)
{
struct perBaseWig *pbwList = perBaseWigLoadContinue(mb, section->chrom, section->chromStart,
section->chromEnd);
struct perBaseWig *pbw;
for (pbw = pbwList; pbw != NULL; pbw = pbw->next)
{
int length = pbw->chromEnd - pbw->chromStart;
if (end > 0)
gap = pbw->chromStart - end;
if (!sameString(chrom, pbw->chrom))
{
if (!sameString(chrom, ""))
slAddHead(&splitList, newBed(chrom, start, end));
strcpy(chrom, pbw->chrom);
start = pbw->chromStart;
end = pbw->chromEnd;
if (size + length > chunk_size)
size = length;
else
size += length;
}
else
{
if ((size + length + gap > chunk_size) && (gap >= min_gap))
{
slAddHead(&splitList, newBed(chrom, start, end));
start = pbw->chromStart;
end = pbw->chromEnd;
size = length;
}
else
{
size += length + gap;
end = pbw->chromEnd;
}
}
}
perBaseWigFreeList(&pbwList);
}
slAddHead(&splitList, newBed(chrom, start, end));
slReverse(&splitList);
for (section = splitList; section != NULL; section = section->next)
{
fprintf(output, "%s\t%d\t%d\n", section->chrom, section->chromStart, section->chromEnd);
}
carefulClose(&output);
metaBigClose(&mb);
bedFreeList(&splitList);
}