## Module import

In [1]:
import sqlite3
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import plotly
import plotly.graph_objects as go
import ipywidgets as widgets

## Module version

In [2]:
print("sqlite3 version:", sqlite3.version)
print("pandas version:", pd.__version__)
print("matplotlib version:", matplotlib.__version__)
print("numpy version:", np.__version__)
print("plotly version:", plotly.__version__)
print("ipywidgets version:", widgets.__version__)

sqlite3 version: 2.6.0
pandas version: 1.2.2
matplotlib version: 3.3.4
numpy version: 1.19.2
plotly version: 4.14.3
ipywidgets version: 7.6.3


# Chromosome visualization

## Functions

Chromosomes data recuperation.

In [3]:
def string_to_list(string):
    return list(string.split(", "))

def get_locus_info(database, info, source, condition, group_by, order_by):
    
    #SQL request
    db_connexion = sqlite3.connect(database)
    
    cursor = db_connexion.cursor()
    
    chrom_sense = cursor.execute("""
    SELECT """ + info + """
    FROM """ + source + """ 
    WHERE Strand == 'W' """ + condition + group_by + """
    ORDER BY """ + order_by)
    
    cursor = db_connexion.cursor()

    chrom_antisense = cursor.execute("""
    SELECT """ + info + """
    FROM """ + source + """ 
    WHERE Strand == 'C'""" + condition + group_by + """
    ORDER BY """ + order_by)
    
    List_info = string_to_list(info)
    
    #pandas dataframe formatting
    chrom_sense = chrom_sense.fetchall()
    chrom_sense = pd.DataFrame(chrom_sense, columns=List_info)
    
    chrom_antisense = chrom_antisense.fetchall()
    chrom_antisense = pd.DataFrame(chrom_antisense, columns=List_info)
    
    return chrom_sense, chrom_antisense

Coordinates formatting.

In [4]:
def format_coordinates(coordinates, y):
    
    genome_data = pd.DataFrame(columns = coordinates.columns)
    row_null = {"Start_coordinate": "none", "Stop_coordinate": "none", "count(SGDID)": "0", "Chromosome": "0"}
    
    for c in range(1, 18):
        chrom = coordinates[coordinates["Chromosome"] == str(c)]
        row_one = chrom.copy()
        row_one.index = range(0, len(chrom)*3, 3)
        row_one = row_one.drop("Stop_coordinate", axis = 1)
        row_one = row_one.transpose()
        
        row_two = chrom.copy()
        row_two.index = range(1, len(chrom)*3, 3)
        row_two["Start_coordinate"] = row_two["Stop_coordinate"]
        row_two = row_two.drop("Stop_coordinate", axis = 1)
        row_two = row_two.transpose()
        
        row_three = chrom.assign(Start_coordinate = "none")
        row_three.index = range(2, len(chrom)*3, 3)
        row_three = row_three.drop("Stop_coordinate", axis = 1)
        row_three = row_three.transpose()
        
        chrom_data = pd.merge(row_one, row_two, how = "outer", left_index = True, right_index = True)
        chrom_data = pd.merge(chrom_data, row_three, how = "outer", left_index = True, right_index = True)
        chrom_data = chrom_data.transpose()
        chrom_data = chrom_data.sort_index()
        chrom_data = chrom_data.assign(Stop_coordinate = y[c-1])    
        
        chrom_data = chrom_data.append(row_null, ignore_index = True)
        
        genome_data = genome_data.append(chrom_data)
        
    return genome_data

Genome drawing.

In [5]:
def genome_drawing(genome_data, parameter, color):
    
    #color = genome_data[color].astype(int)
    #color = np.array(color)
    
    
    chromosomes = format_chromosomes(list(range(0,108,6)),
                                     list(i - 0.4 for i in range(0,108,6)))
    
    fig = go.Figure(data=[go.Scatter(x = chromosomes.x,
                                     y = chromosomes.y,
                                     mode ="lines",
                                     line = {"color": "lightgray", "width": 10},
                                     name = ""),
                          go.Scatter(x = genome_data.Start_coordinate,
                                     y = genome_data.Stop_coordinate,
                                     mode ="lines",
                                     line = {"color": "#569AA7",
                                             "width": 10},
                                     name = "",
                                     customdata = genome_data,
                                     hovertemplate = ("<b>Locus :</b> %{customdata[0]} <br>"
                                                      "<b>Chromosome :</b> %{customdata[4]} <br>"),
                                     hoverlabel = dict(bgcolor = "white", font_size = 16),
                                    )])
    
    fig.update_layout(plot_bgcolor = "white", xaxis_showgrid = False, yaxis_showgrid = False)
    fig.show()


Chromosome shapes.

In [15]:
def get_chromosome_lenght(chrom_number):
    #SQL request
    db_connexion = sqlite3.connect('../SCERE.db')

    cursor = db_connexion.cursor()

    chromosome_length = cursor.execute("""
    SELECT length
    FROM chromosome_length
    """)
    
    chromosome_length = chromosome_length.fetchall()
    chromosome_length = pd.DataFrame(chromosome_length, columns = ["length"], index = list(range(1,18)))
    
    return chromosome_length.loc[chrom_number][0]

def format_chromosomes(y1, y2):
    
    chromosomes = pd.DataFrame(columns = ["x", "y"])
    
    for c in range(1,18):
        chrom_lenght = get_chromosome_lenght(c)
        chromosomes = chromosomes.append({"x": 0, "y": y1[c-1]}, ignore_index = True)
        chromosomes = chromosomes.append({"x": chrom_lenght, "y": y1[c-1]}, ignore_index = True)
        chromosomes = chromosomes.append({"x": "none", "y": "none"}, ignore_index = True)
        
        chromosomes = chromosomes.append({"x": 0, "y": y2[c-1]}, ignore_index = True)
        chromosomes = chromosomes.append({"x": chrom_lenght, "y": y2[c-1]}, ignore_index = True)
        chromosomes = chromosomes.append({"x": "none", "y": "none"}, ignore_index = True)
    
    return chromosomes

## Application

In [16]:
%%time

chrom_sense = get_locus_info("../SCERE.db",
                             """Primary_SGDID, count(SGDID), Start_coordinate, Stop_coordinate, Chromosome""",
                            "gene_literature, SGD_features",
                             """AND (Strand == 'W') AND (SGDID == Primary_SGDID) """, """ GROUP BY SGDID """,
                             """ Start_coordinate"""
                            )[0]

display(chrom_sense)
chrom_sense = format_coordinates(chrom_sense, list(range(0,108,6)))
display(chrom_sense)

chrom_antisense = get_locus_info("../SCERE.db",
                                 """Primary_SGDID, count(SGDID), Start_coordinate, Stop_coordinate, Chromosome""",
                                 "gene_literature, SGD_features",
                                 """AND (Strand == 'C') AND (SGDID == Primary_SGDID) """, """ GROUP BY SGDID """,
                                 """ Start_coordinate"""
                                )[1]

chrom_antisense = format_coordinates(chrom_antisense, list(i - 0.4 for i in range(0,108,6)))

whole_genome = chrom_sense.append(chrom_antisense)

genome_drawing(whole_genome, "Primary_SGDID", "count(SGDID)")


Unnamed: 0,Primary_SGDID,count(SGDID),Start_coordinate,Stop_coordinate,Chromosome
0,S000001826,4,53,535,6
1,S000029654,132,252,1523,2-micron
2,S000002143,1,335,649,1
3,S000001708,3,451,798,11
4,S000028594,1,538,792,1
...,...,...,...,...,...
3956,S000028950,3,1524625,1525089,4
3957,S000028951,2,1525090,1525370,4
3958,S000028949,1,1525371,1525466,4
3959,S000028952,7,1525467,1531933,4


Unnamed: 0,Primary_SGDID,count(SGDID),Start_coordinate,Stop_coordinate,Chromosome
0,S000002143,1,335,0,1
1,S000002143,1,649,0,1
2,S000002143,1,none,0,1
3,S000028594,1,538,0,1
4,S000028594,1,792,0,1
...,...,...,...,...,...
170,S000029023,32,none,96,17
171,S000007284,2,85554,96,17
172,S000007284,2,85709,96,17
173,S000007284,2,none,96,17


CPU times: user 3.05 s, sys: 524 ms, total: 3.57 s
Wall time: 3.57 s
