In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
from tqdm import tqdm
from multiprocessing import pool
import ffmpeg
import os
import os
import json
from data.input import ReadData
from data.processing import FigureCreator
from plots.circle import PlaceCircles, PlotCircles
from multiprocessing import Pool
from time import time
from matplotlib.patches import Rectangle
from bokeh.palettes import Category20_5
from random import randint
import matplotlib.image as image
from matplotlib.offsetbox import ( OffsetImage,AnchoredOffsetbox)
from collections import Counter
import gc

In [2]:
df = pd.read_csv('../ClinVarCharts/ClinVar_2023-03.tsv',sep='\t')
df['submissiondate'] = pd.to_datetime(df['submit date'])    
df.set_index('submissiondate',inplace=True)
#df_leipzig=df[df['submitter']=='Institute of Human Genetics, University of Leipzig Medical Center']


In [3]:
df = df.dropna(subset=['evaluation'])

In [4]:
ev = [x for x in set(df['evaluation']) if 'patho' in x.lower()]
df = df[df['evaluation'].isin(ev)]

In [5]:
xx= Counter(df['gene'])

xx = dict(sorted(xx.items(), key=lambda x:x[1],reverse=True))

genes = list(xx.keys())[:400]

In [6]:
df = df[df['gene'].isin(genes)]
df = df[['gene']]

In [7]:
x = FigureCreator.FigureScheduler(df,CatColumn='gene',ResolutionFactor=.01,MinAppearance=200,GroupingFreq='1M')


In [None]:
x = FigureCreator.FigureScheduler(df_leipzig,CatColumn='gene',ResolutionFactor=1)


In [8]:
im = image.imread('image10.png')

In [9]:
def CreateBarPlot(d,p,ax,ColorDict,lim=30,limdown = -1.5,fontsize=8,heigth=.8):

    for n,(key,values) in enumerate(d.items()):
        if n>lim:
            continue
        y = p[key]
        
        if n<lim:
            ax.add_patch(Rectangle((0, y), d[key], heigth,color = ColorDict[key]))
        
        nText = math.ceil(d[key])
        if y>=limdown:
            ax.text(d[key],y+(heigth/2),f'{key}',ha='right',va='center',fontsize=fontsize,fontweight='bold')
        
        
        

In [None]:
ColorDict = {}


In [None]:
for _ in genes:
    ColorDict[_]= PlotCircles.randomColor()

with open('colors.json','w') as j:
    json.dump(ColorDict,j)

In [10]:
with open('colors.json') as json_file:
    ColorDict = json.load(json_file)

In [11]:
CurrentPositions = {}
NumberOfBars = 25
StepSize = .2
figsize = (10,8)
stillMoving = 0
outf = 'movingbars_allClin'
tit = 'pathogenic variant submissions ClinVar'
for (frameN,frame),Ti in tqdm(zip(x.FramePlotDict.items(),x.TimeArray),total=len(x.FramePlotDict)):
    frame = {k:v for k,v in frame.items() if v>0}
    frame = dict(sorted(frame.items(), key=lambda x:x[1],reverse=True))
    maxV = max(frame.values())+5
    
    stillMoving = 0
    
    for n,entry in enumerate(frame.keys()):
        if n < NumberOfBars+5:
            wantedPos = float(NumberOfBars - n)
        else:
            wantedPos = -2
        if entry not in ColorDict:
            ColorDict[entry]= PlotCircles.randomColor()
        if entry not in CurrentPositions:
            CurrentPositions[entry] = round(float(-2),2)
        
        CurrentPos = CurrentPositions[entry]
        if CurrentPos > wantedPos:
            CurrentPositions[entry] = round(CurrentPos-StepSize,2)
            stillMoving = 1
        elif CurrentPos < wantedPos:
            CurrentPositions[entry] = round(CurrentPos+StepSize,2)
            stillMoving = 1
        
    cc=len(str(frameN))
    cc=(5-cc)*'0'+str(frameN)
    if frameN<0:
        continue
    if os.path.exists(f'{outf}//mvBars_{cc}.jpg'):
        continue
    fig,ax = plt.subplots(figsize=figsize)

    CreateBarPlot(frame,CurrentPositions,ax,ColorDict)
    ax.set(ylim=(-1,NumberOfBars+1),xlim=(0,maxV),title=f'{tit}-{Ti.year}-{Ti.month}',xlabel='submissions per gene')
    ax.set_yticks([],[])

    imagebox = OffsetImage(im, zoom=.4, alpha=0.2)
    imagebox.image.axes = ax
    ao = AnchoredOffsetbox(4, pad=0.01, borderpad=0, child=imagebox)
    ao.patch.set_alpha(0)
    ax.add_artist(ao)
    #plt.tight_layout(rect=(.05,0,1,1))
    plt.subplots_adjust(left=0.05, right=0.99, top=0.95, bottom=0.05)
    plt.savefig(f'{outf}//mvBars_{cc}.jpg',facecolor='lavenderblush',dpi=150)
    plt.clf()
    plt.close("all")
    gc.collect()

    
    
while stillMoving == 1:
    frameN = frameN + 1
    frame = {k:v for k,v in frame.items() if v>0}
    frame = dict(sorted(frame.items(), key=lambda x:x[1],reverse=True))
    maxV = max(frame.values())+5
    
    stillMoving = 0
    
    for n,entry in enumerate(frame.keys()):
        if n < NumberOfBars+5:
            wantedPos = float(NumberOfBars - n)
        else:
            wantedPos = -2
        if entry not in ColorDict:
            ColorDict[entry]= PlotCircles.randomColor()
        if entry not in CurrentPositions:
            CurrentPositions[entry] = round(float(-2),2)
        
        CurrentPos = CurrentPositions[entry]
        if CurrentPos > wantedPos:
            CurrentPositions[entry] = round(CurrentPos-StepSize,2)
            stillMoving = 1
        elif CurrentPos < wantedPos:
            CurrentPositions[entry] = round(CurrentPos+StepSize,2)
            stillMoving = 1
        
    
    fig,ax = plt.subplots(figsize=figsize)

    CreateBarPlot(frame,CurrentPositions,ax,ColorDict)
    ax.set(ylim=(-1,NumberOfBars+1),xlim=(0,maxV),title=f'{tit}-{Ti.year}-{Ti.month}',xlabel='submissions per gene')
    ax.set_yticks([],[])
    
    cc=len(str(frameN))
    cc=(5-cc)*'0'+str(frameN)
    
    imagebox = OffsetImage(im, zoom=.4, alpha=0.2)
    imagebox.image.axes = ax
    ao = AnchoredOffsetbox(4, pad=0.01, borderpad=0, child=imagebox)
    ao.patch.set_alpha(0)
    ax.add_artist(ao)
    plt.subplots_adjust(left=0.05, right=0.99, top=0.95, bottom=0.05)

    plt.savefig(f'{outf}//mvBars_{cc}.jpg',facecolor='lavenderblush',dpi=150)
    plt.close()
    plt.close()

    
for _ in range(96):
    frameN = frameN + 1
    
    fig,ax = plt.subplots(figsize=figsize)

    CreateBarPlot(frame,CurrentPositions,ax,ColorDict)

    ax.set(ylim=(-1,NumberOfBars+1),xlim=(0,maxV),title=f'{tit}-{Ti.year}-{Ti.month}',xlabel='submissions per gene')
    ax.set_yticks([],[])
    cc=len(str(frameN))
    cc=(5-cc)*'0'+str(frameN)
    imagebox = OffsetImage(im, zoom=.4, alpha=0.2)
    imagebox.image.axes = ax
    ao = AnchoredOffsetbox(4, pad=0.01, borderpad=0, child=imagebox)
    ao.patch.set_alpha(0)
    ax.add_artist(ao)
    plt.subplots_adjust(left=0.05, right=0.99, top=0.95, bottom=0.05)

    plt.savefig(f'{outf}//mvBars_{cc}.jpg',facecolor='lavenderblush',dpi=150)
    plt.close()
    plt.close()
    

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 14653/14653 [36:34<00:00,  6.68it/s]


In [None]:
stillMoving

In [None]:
steps = 0.05
for n,x in tqdm(enumerate(range(500))):
    cc=len(str(n))
    cc=(5-cc)*'0'+str(n)
    y=n*steps
    fig,ax = plt.subplots()
    ax.add_patch(Rectangle((1, n*steps), 6, 2))
    ax.set(ylim=(0,50),xlim=(0,50))
    plt.savefig(f'movingbars//mvBars_{cc}.jpg',facecolor='lavenderblush',dpi=150)
    plt.close()

In [15]:
(
    ffmpeg
    .input('movingbars_allClin///*.jpg', pattern_type='glob', framerate=112)
    .output('movingbarsAllClin.mp4')
    .run()
)

ffmpeg version 5.1 Copyright (c) 2000-2022 the FFmpeg developers
  built with gcc 10.4.0 (conda-forge gcc 10.4.0-16)
  configuration: --prefix=/home/conda/feedstock_root/build_artifacts/ffmpeg_1660333770726/_h_env_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_placehold_plac --cc=/home/conda/feedstock_root/build_artifacts/ffmpeg_1660333770726/_build_env/bin/x86_64-conda-linux-gnu-cc --cxx=/home/conda/feedstock_root/build_artifacts/ffmpeg_1660333770726/_build_env/bin/x86_64-conda-linux-gnu-c++ --nm=/home/conda/feedstock_root/build_artifacts/ffmpeg_1660333770726/_build_env/bin/x86_64-conda-linux-gnu-nm --ar=/home/conda/feedstock_root/build_artifacts/ffmpeg_1660333770726/_build_env/bin/x86_64-conda-linux-gnu-ar --disable-doc --disable-openssl --enable-demuxer=dash --enable-hardcoded-tables --enable-libfreetype --enable-libfontconfig --enable-libopenh264 --enable-gnutl

(None, None)