# makeconfigs-features
## This file parses through our input data to determine similarity between features and use that to create a karyotype and config file for a circos.ca figure

### first load the file into a data structure

In [1]:
import csv
import random

inpdat = "../../../3ormoreDATLOW.csv"
with open(inpdat,'r') as f:
    it = csv.reader(f)
    listit = list(it)

featnames = listit[0][1:-1]
cimphigh = [] # data set of only foreground
#only view data for foreground
for row in listit:
    if(row[0] == ""):
        cimphigh.append(row)
        continue
    if(float(row[len(listit[0])-1]) == 1.0):
        cimphigh.append(row)


#print number of samples in foreground
numsamps = len(cimphigh)-1
numfeats = len(cimphigh[0])-2
print("number of samples:",numsamps)
print("number of features:",numfeats)
featnames

number of samples: 32
number of features: 17


['BRAF_GRCh37_7:140453136-140453136_Missense-Mutation-SNP-A-A-T_Missense-Mutation-SNP-A-T-T',
 'GRB14_GRCh37_2:165365288-165365296_In-Frame-Del-DEL-TTTTTTTTT----',
 'MBD4_GRCh37_3:129155548-129155557_Frame-Shift-Del-DEL-TTTTTTTTTT----',
 'MSH6_GRCh37_2:48030640-48030647_Frame-Shift-Del-DEL-CCCCCCCC----',
 'PRDM2_GRCh37_1:14108749-14108757_In-Frame-Del-DEL-AAAAAAAAA----',
 'HMMR_GRCh37_5:162917426-162917434_In-Frame-Del-DEL-AAAAAAAAA----',
 'SEC63_GRCh37_6:108214755-108214764_Frame-Shift-Del-DEL-TTTTTTTTTT----',
 'KRAS_GRCh37_12:25398284-25398284_Missense-Mutation-SNP-C-T-T',
 'PIK3CA_GRCh37_3:178952085-178952085_Missense-Mutation-SNP-A-G-G',
 'RAD50_GRCh37_5:131931452-131931460_In-Frame-Del-DEL-AAAAAAAAA----',
 'ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
 'CASP5_GRCh37_11:104878041-104878050_Frame-Shift-Del-DEL-TTTTTTTTTT----',
 'MSH3_GRCh37_5:79970915-79970922_Frame-Shift-Del-DEL-AAAAAAAA----',
 'ATR_GRCh37_3:142274740-142274749_Frame-Shift-Del-DEL-TTTTTTTT

### next parse through the features and generate "similarity scores" (number of times 2 features occur together acros CIMP-High samples)

In [2]:
similarities = {} # pairwise scores/counts of similarity stored here
for x in range(1,len(cimphigh[0])-1):
    for y in range(1,len(cimphigh[0])-1): # iterate n^2 to get all pairwise relationships
        if(cimphigh[0][x] == cimphigh[0][y] or (cimphigh[0][y],cimphigh[0][x]) in similarities): # skip relationships between samples with themselves, or if already in dict
            continue
        # calculate number of similar features between two samples
        counter = 0
        for k in range(1,len(cimphigh)):
            if(float(cimphigh[k][x]) == 1.0 and float(cimphigh[k][y]) == 1.0):
                counter += 1
        similarities[(cimphigh[0][x],cimphigh[0][y])] = counter

similarities

{('ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
  'AIM2_GRCh37_1:159032487-159032496_Frame-Shift-Del-DEL-TTTTTTTTTT----'): 5,
 ('ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
  'ATR_GRCh37_3:142274740-142274749_Frame-Shift-Del-DEL-TTTTTTTTTT----'): 4,
 ('ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
  'CASP5_GRCh37_11:104878041-104878050_Frame-Shift-Del-DEL-TTTTTTTTTT----'): 3,
 ('ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
  'MSH3_GRCh37_5:79970915-79970922_Frame-Shift-Del-DEL-AAAAAAAA----'): 5,
 ('ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
  'TCF7L2_GRCh37_10:114925317-114925325_In-Frame-Del-DEL-AAAAAAAAA----'): 4,
 ('ACVR2A_GRCh37_2:148683686-148683693_Frame-Shift-Del-DEL-AAAAAAAA----',
  'TGFBR2_GRCh37_3:30691872-30691881_Frame-Shift-Del-DEL-AAAAAAAAAA----'): 10,
 ('AIM2_GRCh37_1:159032487-159032496_Frame-Shift-Del-DEL-TTTTTTTTTT----',
  'TCF7L2_GRCh37_

### open and set up a karyotype file.
### start by defining "chromosomes", eg nodes of the circo

In [3]:
kary = open("karyotype.txt","w")

# obtain n colors, algorithm written by Vivek Anandan user on Quora
colors = []
r = int(random.random() * 256)
g = int(random.random() * 256)
b = int(random.random() * 256)
step = 256/numsamps
for i in range(numsamps):
    r += step
    g += step
    b += step
    r = int(r) % 256
    g = int(g) % 256
    b = int(b) % 256
    colors.append((r,g,b))

kary.write("# initialization of features\n")
# format: chr - ID LABEL START END COLOR
for x in range(0,len(cimphigh[0])-2):
    # max thickness of the line is the total number of features
    kary.write("chr - %s %s %d %d green\n" % (cimphigh[0][x+1].split("_")[0],cimphigh[0][x+1].split("_")[0], (x*6), (x*6)+numsamps)) # defines a "chromosome"

### create file with links (the connections between samples)

In [4]:
# close up the karyotype file and make a links file
kary.close()
links = open("links.txt","w")

links.write("# make links\n")
#iterate over similarity scores and create a link if the score is at least 1
count = 0
for t1, t2 in similarities:
    val = similarities[(t1,t2)]
    if(val > 0):
        count+=1
        # link thickness will be number of similarity
        links.write("link%d %s %d %d z=0,color=blue\n" % (count, t1.split("_")[0], (featnames.index(t1)*6), (featnames.index(t1)*6)+val))
        links.write("link%d %s %d %d z=0,color=blue\n" % (count, t2.split("_")[0], (featnames.index(t2)*6), (featnames.index(t2)*6)+val))

### next up is creating the config file. load up data and set rules for links

In [5]:
# close up the link file and make a config file
links.close()
conf = open("circos.conf","w")


#required config lines
conf.write("\n\n\n# required files from circos.ca\n")
#conf.write("<image>\n")
conf.write("<<include etc/image.conf>>\n")
conf.write("<<include etc/colors_fonts_patterns.conf>>\n")
conf.write("<<include etc/housekeeping.conf>>\n")
#conf.write("</image>\n")

# link karyotype
conf.write("karyotype = karyotype.txt\n\n")

# setup links
conf.write("# add links\n")
conf.write("<links>\n")
conf.write("show = yes\n")
conf.write("ribbon = yes\n")
conf.write("flat = yes\n")
conf.write("radius = 0.975r\n")
conf.write("bezier_radius = 0r\n")
conf.write("thickness = 2\n")

conf.write("<link>\n")
conf.write("file = links.txt\n")

# set rules for color based on KRAS or BRAF
# note these are completely optional. you can remove all rules if you wish
'''conf.write("<rules>\n")
conf.write("<rule>\n")
conf.write("condition = between(BRAF,KRAS)\n")
conf.write("color = black\n")
conf.write("</rule>\n")

conf.write("<rule>\n")
conf.write("condition = var(chr1) eq \"KRAS\"\n")
conf.write("color = red\n")
conf.write("</rule>\n")

conf.write("<rule>\n")
conf.write("condition = var(chr1) eq \"BRAF\"\n")
conf.write("color = purple\n")
conf.write("</rule>\n")
conf.write("</rules>\n")'''

conf.write("</link>\n")
conf.write("</links>\n")

# setup image
conf.write("\n# setup image\n")
conf.write("<image>\n")
conf.write("<<include etc/image.conf>>\n")
conf.write("</image>\n")

# setup ideogram
conf.write("\n# setup ideogram\n")
conf.write("<ideogram>\n")
conf.write("show_label = yes\n")
conf.write("label_font = default\n")
conf.write("label_radius = dims(ideogram,radius) + 0.075r\n")
conf.write("label_size = 28\n")
conf.write("label_parallel = yes\n")
conf.write("<spacing>\n")
conf.write("default = .07r\n")
conf.write("break = .5r\n")
conf.write("</spacing>\n\n")
conf.write("thickness = 20p\n")
conf.write("fill = yes\n")
conf.write("radius = 0.90r\n")
conf.write("</ideogram>")


conf.close()