In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import math
import numpy as np
from tqdm import tqdm
from multiprocessing import pool
import ffmpeg
import os
import os
import json
from data.input import ReadData
from data.processing import FigureCreator
from plots.circle import PlaceCircles, PlotCircles
from multiprocessing import Pool
from time import time

from bokeh.palettes import Category20_5

In [2]:
df = pd.read_csv('../ClinVarCharts/ClinVarJan22_2023.tsv',sep='\t')
df['submissiondate'] = pd.to_datetime(df['submit date'])    
df.set_index('submissiondate',inplace=True)
df_leipzig=df[df['submitter']=='Institute of Human Genetics, University of Leipzig Medical Center']


In [None]:
class CreateCircleVideo():
    def __init__(self,df,
                 CatColumn='gene',
                 ResolutionFactor=1,
                 OutPathJPGs='./jpgs',
                 VideoName='movie.mp4',
                 figsize=(8,8),
                 facecolor='lavenderblush'):
        self.figsize = figsize
        self.facecolor = facecolor
        self.OutPathJPGs = OutPathJPGs
        self.VideoName = VideoName
        self.CatColumn = CatColumn
        self.ResolutionFactor = ResolutionFactor
        self.figScetch = FigureCreator.FigureScheduler(df,CatColumn=CatColumn,ResolutionFactor=ResolutionFactor)
        self.CircCoords = ReadData.ReadDataFile(df_leipzig,CatColumn=CatColumn)
        self.CircCoords.CreateCounterDict()
        self.CircCoordsPlot= PlaceCircles.PaceCircles(self.CircCoords)
        self.ColorDict = {k[0]:PlotCircles.randomColor() for k in self.CircCoordsPlot.coords}
        self.Plotcoords = {k[0]:k[2] for k in self.CircCoordsPlot.coords}        
        
        self.GetAxLim()
        self.MultiList()
        
    
    def GetAxLim(self):
        xx = [x[2][0] for x in self.CircCoordsPlot.coords]
        yy = [x[2][1] for x in self.CircCoordsPlot.coords]
        ymin = abs(min(yy))
        ymax = abs(max(yy))
        xmin = abs(min(xx))
        xmax = abs(max(xx))
        self.AxLim = max([ymin,ymax,xmin,xmax])*1.01

        
        
        
    def CreateFrames(self,cpu=4):
        mp_split = np.array_split(self.MultiList, cpu)
        print(f'....start plotting in {self.OutPathJPGs}')
        
        with Pool(processes=cpu) as p:
            r = p.map(self.PlotFigures,mp_split)
            
    def CreateVideo(self,outname = 'video.mp4',framerate=12):
        if os.path.exists(outname):
            print(f'{outname} already exist')
            return
        (
            ffmpeg
            .input(f'{self.OutPathJPGs}/*.jpg', pattern_type='glob', framerate=framerate)
            .output(outname)
            .run()
            )
    
    def MultiList(self):
        c= 1
        self.MultiList=[]
        for (frame,content),date  in zip(self.figScetch.FramePlotDict.items(),
                                                      self.figScetch.TimeArray):
    
    
            cc=len(str(c))
            cc=(5-cc)*'0'+str(c)

            content = {k:v for k,v in content.items() if v>0}
            
            self.MultiList.append((content,date,cc))
            c = c+1
            
    def PlotFigures(self,multilist):
        for arguments in tqdm(multilist):
            content,date,frame = arguments
            fig,ax = plt.subplots(figsize=self.figsize)
            PlotCircles.PlotCircles(content,self.Plotcoords,date,self.ColorDict,ax,self.AxLim)
            plt.tight_layout(rect=(0,0,1,1))
            plt.savefig(f'{self.OutPathJPGs}/circVis_{frame}.jpg',facecolor=self.facecolor,dpi=200)
            plt.close()
        
    

In [None]:
xx= time()
x = CreateCircleVideo(df_leipzig,OutPathJPGs='animation2/',ResolutionFactor=.5)
x.CreateFrames(cpu=8)
x.CreateVideo(outname='test5.mp4',framerate=24)

print(time()-xx)

In [None]:
x.figScetch.FramePlotDict.values()

In [None]:
x.figScetch.FramePlotDict[list(x.figScetch.FramePlotDict.keys())[-1]]

In [None]:
c= 1

Pathogenic= [] 

for (frame,content),date  in tqdm(zip(figScetch.FramePlotDict.items(),
                                                      figScetch.TimeArray),total=len(figScetch.TimeArray)):
    
    
    cc=len(str(c))
    cc=(5-cc)*'0'+str(c)
    if os.path.exists(f'animation2/circVis_{cc}.jpg'):
        c=c+1
        continue
    fig,ax = plt.subplots(figsize=(8,8))

    content = {k:v for k,v in content.items() if v>0}
    CircPlot(content,coords,date,ColorDict,ax,lim)
    plt.tight_layout(w_pad=1, h_pad=1.0)
    plt.savefig(f'animation2/circVis_{cc}.jpg',facecolor='lavenderblush',dpi=200)
    plt.close()
    c = c+1
    #break

In [None]:
(
    ffmpeg
    .input('animation2/*.jpg', pattern_type='glob', framerate=12)
    .output('movie2.mp4')
    .run()
)

In [None]:
[1,2,3].copy()

In [3]:
class CreateCircleVideoWithSidePlot():
    def __init__(self,df,
                 CatColumn1='gene',
                 CatColumn2='evaluation',
                 ResolutionFactor=1,
                 OutPathJPGs='./jpgs',
                 VideoName='movie.mp4',
                 figsize=(8,10),
                 facecolor='lavenderblush'):
        
        
        self.figsize = figsize
        self.facecolor = facecolor
        self.OutPathJPGs = OutPathJPGs
        self.VideoName = VideoName
        self.CatColumn = CatColumn1
        self.ResolutionFactor = ResolutionFactor
        self.figScetch = FigureCreator.FigureScheduler(df,CatColumn=CatColumn1,ResolutionFactor=ResolutionFactor)
        self.CircCoords = ReadData.ReadDataFile(df_leipzig,CatColumn=CatColumn1)
        self.CircCoords.CreateCounterDict()
        self.CircCoordsPlot= PlaceCircles.PaceCircles(self.CircCoords)
        self.ColorDict = {k[0]:PlotCircles.randomColor() for k in self.CircCoordsPlot.coords}
        self.Plotcoords = {k[0]:k[2] for k in self.CircCoordsPlot.coords}    
        
        
        
        
        
        self.figScetchSidePlot = FigureCreator.FigureScheduler(df,CatColumn=CatColumn2,
                                                               ResolutionFactor=ResolutionFactor)
        
        self.CreateSideDict()
        self.GetAxLim()
        self.MultiList()
        

    def GetAxLim(self):
        xx = [x[2][0] for x in self.CircCoordsPlot.coords]
        yy = [x[2][1] for x in self.CircCoordsPlot.coords]
        ymin = abs(min(yy))
        ymax = abs(max(yy))
        xmin = abs(min(xx))
        xmax = abs(max(xx))
        self.AxLim = max([ymin,ymax,xmin,xmax])*1.01

        
        
        
    def CreateFrames(self,cpu=4):
        mp_split = np.array_split(self.MultiList, cpu)
        print(f'....start plotting in {self.OutPathJPGs}')
        
        with Pool(processes=cpu) as p:
            r = p.map(self.PlotFigures,mp_split)
            
    def CreateVideo(self,outname = 'video.mp4',framerate=12):
        if os.path.exists(outname):
            print(f'{outname} already exist')
            return
        (
            ffmpeg
            .input(f'{self.OutPathJPGs}/*.jpg', pattern_type='glob', framerate=framerate)
            .output(outname)
            .run()
            )
    
    def MultiList(self):
        c= 1
        self.SideDict = []
        for k in self.figScetchSidePlot.FramePlotDict[list(self.figScetchSidePlot.FramePlotDict.keys())[-1]]:
            self.SideDict.append([])
            
        self.MultiList=[]
        for (frame,content),(frameSide,contentSide),date  in zip(self.figScetch.FramePlotDict.items(),
                                                                 self.figScetchSidePlot.FramePlotDict.items(),
                                                      self.figScetch.TimeArray):
    
    
            cc=len(str(c))
            cc=(5-cc)*'0'+str(c)
            content = {k:v for k,v in content.items() if v>0}
            #contentSide = {k:v for k,v in contentSide.items() if v>0}
            
            labels = []
            
            for n,(k,v) in enumerate(contentSide.items()):
                xx = self.SideDict[n]
                xx.append(v)
                self.SideDict[n]=xx
                labels.append(k)
            sideplotarray = self.SideDict.copy()
            self.MultiList.append((content,sideplotarray, labels,date,cc))
            c = c+1
      
 
    
    def CreateSideDict(self):
        self.SideDict = {}
        for k in self.figScetchSidePlot.FramePlotDict[list(self.figScetchSidePlot.FramePlotDict.keys())[-1]]:
            self.SideDict[k]=[]
            
    
    
    def PlotFigures(self,multilist):
        for arguments in tqdm(multilist):
            content,contentSide ,labels,date,frame = arguments
            print
            
            
                
                    
            fig = plt.figure(figsize=self.figsize)
            gs = fig.add_gridspec(5,4)
            ax1 = fig.add_subplot(gs[:4, :])
            ax2 = fig.add_subplot(gs[4,: ])
    
            
            x = range(len(self.SideDict[0]))
            ax2.stackplot(x,contentSide,
                          labels=labels,
                        colors=Category20_5)
            
            ax2.legend(bbox_to_anchor=(0,1),loc='upper left')
            ax2.set_xticks([],[])
            ax2.set_yticks([],[])
            
            PlotCircles.PlotCircles(content,self.Plotcoords,date,self.ColorDict,ax1,self.AxLim)
            plt.tight_layout(rect=(0,0,1,1))
            plt.savefig(f'{self.OutPathJPGs}/circVis_{frame}.jpg',facecolor=self.facecolor,dpi=200)
            plt.close()
        
    

In [None]:
x.SideDict.keys()

In [None]:
xx= time()
x = CreateCircleVideoWithSidePlot(df_leipzig,OutPathJPGs='animation2/',ResolutionFactor=.1)
x.CreateFrames(cpu=8)
x.CreateVideo(outname='test8.mp4',framerate=24)

print(time()-xx)

 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                                                | 151/199 [00:21<00:18,  2.63it/s]

In [None]:
x.CreateVideo(outname='test6.mp4',framerate=24)
