In [156]:
events.tab<-read.table('data/fromGTF.SE.txt',sep='\t',header=T)

up.intron.s<-events.tab$upstreamEE

up.intron.e<-events.tab$exonStart_0base

down.intron.s<-events.tab$exonEnd

down.intron.e<-events.tab$downstreamES

out.tab<-cbind(as.character(events.tab$chr),up.intron.s,up.intron.e,as.character(events.tab$geneSymbol),
               up.intron.e-up.intron.s-1,as.character(events.tab$strand),events.tab$ID)

out.tab<-rbind(out.tab,cbind(as.character(events.tab$chr),down.intron.s,down.intron.e,as.character(events.tab$geneSymbol),
               down.intron.e-down.intron.s-1,as.character(events.tab$strand),events.tab$ID))

write.table(out.tab,'flanking_introns.bed',sep='\t',col.names = F,row.names = F,quote = F)


In [157]:
system('conda install -c bioconda bedtools')

In [158]:
system('bedtools intersect -wa -wb -s -F 1 -a flanking_introns.bed -b repeats.bed > intron_alu_intersections.bed')

all.intron.alu.intr<-read.table('intron_alu_intersections.bed',sep='\t',header = F)

colnames(all.intron.alu.intr)<-c('chr','up_intr_s','up_intr_e','gene','up_intr_l','strand','ID','chr','alu_s','alu_e','alu','blank','strand')


In [159]:
all.intron.alu.intr<-all.intron.alu.intr[grepl('^Alu',all.intron.alu.intr$alu),]

In [160]:
all.intron.alu.intr=all.intron.alu.intr[!duplicated(paste0(all.intron.alu.intr$ID,all.intron.alu.intr$alu)),]

In [161]:
tissue.list=read.csv('data/tissue_list.csv',header=FALSE)

sb.events=c()

all.events=c()

for (tissue in tissue.list$V2)
{
    
    all.tissue.events=read.csv(paste0('data/se_',tissue,'_AS_model_B_sex_as_events.csv'))
  
    tissue.all.event.ids=lapply(lapply(lapply(as.character(rownames(all.tissue.events)),strsplit,split='-'),unlist)
                                ,'[[',2)
    
    all.events=unique(c(all.events,tissue.all.event.ids))
                        
    tissue.sb.events=read.csv(paste0('data/se_',tissue,'_AS_model_B_sex_as_events_refined.csv'))
    
    tissue.sb.event.ids=lapply(lapply(lapply(as.character(rownames(tissue.sb.events)),strsplit,split='-'),unlist)
                               ,'[[',2)
  
    sb.events=unique(c(sb.events,tissue.sb.event.ids))
    
}

In [162]:
alu.enrich=matrix(ncol=7,nrow=0)

colnames(alu.enrich)<-c('Alu','AluASCount','AluSBCount','TotalAS','TotalSB','PVal','FDR')

num.sb.events=length(sb.events) #total number of sex-biased events

num.as.events=length(all.events) #total number of alternative splicing events

all.events.per.alu=table(all.intron.alu.intr$alu[all.intron.alu.intr$ID %in% all.events])   #number of events per Alu family

sb.events.per.alu=table(all.intron.alu.intr$alu[all.intron.alu.intr$ID %in% sb.events]) #number of sex-biased events per Alu family

for (i in (1:length(all.events.per.alu)))    
{
  
  cur.alu<-names(all.events.per.alu)[i]  #get the name of the next Alu family
  
  if (!cur.alu %in% names(sb.events.per.alu) | all.events.per.alu[cur.alu]<10 | !grepl('^Alu',cur.alu))
    
    next
  
  alu.sb<-sb.events.per.alu[cur.alu]  #sex-biased events with this Alu family
  
  alu.as<-all.events.per.alu[cur.alu]  #all AS events with this Alu family
  
  pval<-exp(phyper(q=alu.sb, m=alu.as, n=num.as.events-alu.as, k=num.sb.events,log.p = T,lower.tail = F))
  
  alu.enrich<-rbind(alu.enrich, c(cur.alu,alu.as,alu.sb,num.as.events,num.sb.events,pval,0))
  
}



In [163]:
alu.enrich[order(alu.enrich[,'PVal']),]

Alu,AluASCount,AluSBCount,TotalAS,TotalSB,PVal,FDR
AluYe6,25,6,39970,3948,0.0088504196909708,0
AluYk2,81,12,39970,3948,0.0538096385965136,0
AluYb9,45,7,39970,3948,0.0713761391664266,0
AluYh7,12,2,39970,3948,0.1077257677052,0
AluYf1,180,22,39970,3948,0.120608955681909,0
AluYc3,66,8,39970,3948,0.201043020038783,0
AluYk12,18,2,39970,3948,0.260017900341417,0
AluYk4,148,15,39970,3948,0.390636366257782,0
AluSq10,239,24,39970,3948,0.412244372530931,0
AluYh3,309,31,39970,3948,0.416202006192871,0


In [171]:
esr.tab<-read.table('data/all-byclass-noM-plusminus-hits-uniq.txt',sep=' ',header = T)

chr<-unlist(lapply(lapply(lapply(lapply(as.character(esr.tab$item_coordinates),strsplit,split=':'),'[[',1),'[[',1),'[',1))

start<-unlist(lapply(lapply(lapply(as.character(esr.tab$item_coordinates),strsplit,split=':'),'[[',1),'[',2))

start<-unlist(lapply(lapply(lapply(start,strsplit,split='-'),'[[',1),'[',1))

end<-unlist(lapply(lapply(lapply(as.character(esr.tab$item_coordinates),strsplit,split=':'),'[[',1),'[',2))

end<-unlist(lapply(lapply(lapply(end,strsplit,split='-'),'[[',1),'[',2))

end<-unlist(lapply(lapply(lapply(end,strsplit,split=','),'[[',1),'[',1))

strand<-unlist(lapply(lapply(lapply(as.character(esr.tab$item_coordinates),strsplit,split=':'),'[[',1),'[',2))

strand<-unlist(lapply(lapply(lapply(strand,strsplit,split=','),'[[',1),'[',2))

out.tab<-cbind(chr,start,end,rep('NA',length(chr)),as.integer(end)-as.integer(start)-1,strand,as.character(esr.tab$sequence_name),esr.tab$item_antisense)

write.table(out.tab,'alu_with_esr.bed',sep='\t',col.names = F,row.names = F,quote = F)

system('bedtools intersect -wa -wb -s -F 1 -a flanking_introns.bed -b alu_with_esr.bed > intron_esr_intersections.bed')


In [165]:
rm(out.tab)

rm(esr.tab)

rm(chr)

rm(start)

rm(end)

rm(strand)

In [169]:
alu.esr.tab<-read.table('intron_esr_intersections.bed',sep='\t',header = F)

alu.esr.tab<-alu.esr.tab[grepl('^Alu',alu.esr.tab$V14),]

alu.esr.tab<-alu.esr.tab[alu.esr.tab$V15==TRUE,]

alu.esr.tab<-alu.esr.tab[!duplicated(paste0(alu.esr.tab$V7,alu.esr.tab$V14)),]

alu.esr.tab<-alu.esr.tab[alu.esr.tab$V7 %in% all.events,]

alu.types<-table(as.character(alu.esr.tab$V14))

sb.esr.tab<-table(as.character(alu.esr.tab$V14[alu.esr.tab$V7 %in% sb.events]))

alu.esr.enrich<-matrix(ncol=7,nrow=0)

colnames(alu.esr.enrich)<-c('Alu','CountAS','CountSB','TotalAS','TotalSB','PVal','FDR')

for (i in (1:length(alu.types)))
{
  
  cur.cat<-names(alu.types)[i]  
  
  count.sb<-sb.esr.tab[cur.cat]
  
  count.as<-alu.types[cur.cat]
  
  if (is.na(count.sb) || all.events.per.alu[cur.cat]+sb.events.per.alu[cur.cat]<25 || count.sb<25)
    
    next
          
  pval<-exp(phyper(q=count.sb, m=count.as, n=all.events.per.alu[cur.cat]-count.as, k=sb.events.per.alu[cur.cat],log.p = T,lower.tail = F))
  
  alu.esr.enrich<-rbind(alu.esr.enrich, c(cur.cat,count.as,count.sb,all.events.per.alu[cur.cat],sb.events.per.alu[cur.cat],pval,0))
}



In [173]:
alu.esr.enrich[order(alu.esr.enrich[,'PVal']),]

Alu,CountAS,CountSB,TotalAS,TotalSB,PVal,FDR
AluSx1,4837,447,8758,773,0.0593287357781814,0
AluJo,3292,308,7142,636,0.100540526736884,0
AluY,2895,262,8201,707,0.143748843986385,0
AluSq,853,73,1680,136,0.213178593181609,0
AluSc5,381,32,802,63,0.249417737807356,0
AluJb,5247,448,9921,836,0.322810663420517,0
AluSg,1972,161,3853,310,0.368433669930801,0
AluSz6,2423,186,4784,366,0.451236374231679,0
AluSp,2577,220,4903,421,0.531856367563687,0
AluJr4,1105,95,2410,210,0.544707024875345,0
