In [1]:
import imageio
import cv2
import subprocess
import librosa
import librosa.display
import soundfile as sf
import os
import torch
from torchvision import transforms
from inference.Inferencer import Inferencer
from models.PasticheModel import PasticheModel
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Audio
from PIL import Image
from pathlib import Path
import tqdm.notebook as tq
import pandas as pd
from scipy.ndimage import gaussian_filter

pd.set_option('display.max_rows', 500)
from IPython.core.display import HTML, display
def rm_out_padding(): display(HTML("<style>div.output_subarea { padding:unset;}</style>"))
rm_out_padding()

In [2]:
def get_beats(audio, n_freqs, gauss_size=6, std_window=150):
    audio = np.split(audio, n_freqs, axis=0)
    out = []
    
    def rolling_window(a, window):
        pad = np.ones(len(a.shape), dtype=np.int32)
        pad[-1] = window-1
        pad = list(zip(pad, np.zeros(len(a.shape), dtype=np.int32)))
        a = np.pad(a, pad,mode='reflect')
        shape = a.shape[:-1] + (a.shape[-1] - window + 1, window)
        strides = a.strides + (a.strides[-1],)
        return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides)
    
    for z in audio:
        #print(z.shape)
        filt = gaussian_filter(z, sigma=[0, gauss_size])
        filt = np.var(rolling_window(filt, std_window), axis=-1)[:-1, :]
        #print(filt.shape)
        out.append([z[a, i] for i, a in enumerate(np.argmax(filt, axis=0))])

    return out

def get_transformation(im, pos, strength=1):
    #im = reference frame
    #pos is float from 0 to 1 that interps between the images
    #strength float 0 to 1 that sets blending between ref and transformed image. strength 1 is 100% transformed
    pos = pos - int(pos)
    style_1 = int(pos*16)
    style_2 = style_1 + 1 if style_1 < 15 else 0
    blend = pos*16 - style_1
    out = np.array(inference.eval_image(im, style_1, style_2, 1-blend))
    shape = [1080, 1920]
    styled = cv2.resize(out, dsize=(shape[1], shape[0]), interpolation=cv2.INTER_NEAREST)
    ref = np.array(ref_im, dtype='uint8')
    ref = cv2.resize(ref, dsize=(shape[1], shape[0]), interpolation=cv2.INTER_NEAREST)
    out = np.array(strength * styled + (1-strength) * ref, dtype='uint8')
    return out

def get_multi_transformations(im, pos, masks, strength=1):
    if type(strength) is not list:
        strength = [strength for s in range(len(pos))]
    out = get_transformation(im, pos[0], strength=strength[0]) * masks[0]
    for i, p in enumerate(pos[1:]):
        out += get_transformation(im, pos[i+1], strength=strength[i+1]) * masks[i+1]
    return out

def get_ref_and_masks(folder):
    files = os.listdir(folder)
    ref_im_path = [i for i in files if 'ref' in i][0]
    mask_paths = [i for i in files if 'mask' in i]
    ref_im = Image.open(folder + '\\' + ref_im_path).convert('RGB')
    #try:
    masks = [cv2.resize(np.where(imageio.imread(folder + '\\' + mp, as_gray=False, pilmode="RGB")[:,:,:3]>120,1,0), dsize=(1920, 1080), interpolation=cv2.INTER_NEAREST) for mp in mask_paths]
#     except IndexError:
#         masks = [cv2.resize(np.where(imageio.imread(folder + '\\' + mp, cv2.IMREAD_COLOR)>120,1,0), dsize=(1920, 1080), interpolation=cv2.INTER_NEAREST) for mp in mask_paths]
    # make sure there is no overlap
    for i, m in enumerate(masks[1:]):
        for j in range(i+1):
            m -= m * masks[j]
            
    # add a background mask
    masks.append(np.where(masks[0]==1, 1,0))
    for m in masks[:-1]:
        masks[-1] += m
    masks[-1] = np.where(masks[-1]==0, 1, 0)
    
    return ref_im, masks

#prep images
def prep_transformations(im, inference, ims_per_style, strength=1, num_styles=16):
    transformed_images = []
    tmp_path = r'tmp_images\\'
    shape = np.array(im).shape
    ref = np.array(im)
    ref = cv2.resize(ref, dsize=(1920, 1080), interpolation=cv2.INTER_NEAREST)
    for i in tq.trange(num_styles, desc='Prepping styles'):
        for j in range(ims_per_style):
            fname = 't_style_%d_blend_%d_str_%d.png'%(i, j, strength*100)
            style_2 = i + 1 if i < 15 else 0
            tmp = inference.eval_image(im, i, style_2, 1-(j/ims_per_style))
            tmp = np.array(tmp)
            tmp = cv2.resize(tmp, dsize=(1920, 1080), interpolation=cv2.INTER_NEAREST)
            transformed_images.append(np.array(strength * tmp + (1-strength) * ref, dtype='uint8'))
            #tmp = np.array(strength * tmp + (1-strength) * ref, dtype='uint8')
            #imageio.imsave(tmp_path+fname, tmp)
                           
    return transformed_images

def get_transform_from_prepped_multi(t_ims, pos, mask, str_index, ims_per_style, num_styles=16):
    if type(str_index) is not list:
        str_index = [str_index for s in range(len(pos))]
    #make so last mask is last pos and last str
    str_index[len(mask)-1] = str_index[-1]
    pos[len(mask)-1] = pos[-1]
    
    pos = [p - int(p) for p in pos][:len(mask)]
    
    t_index = int(round(pos[0]*ims_per_style*num_styles))
    t_index = 0 if t_index == len(t_ims[0]) else t_index
    out = mask[0] * t_ims[str_index[0]][t_index]
    for i, p in enumerate(pos[1:]):
        t_index = int(round(p*ims_per_style*num_styles))
        t_index = 0 if t_index == len(t_ims[0]) else t_index
        try:
            out += mask[i+1] * t_ims[str_index[i+1]][t_index]
        except IndexError:
            print(i, len(mask), len(str_index), t_index, len(t_ims[0]))
    return out

def load_model(model_save_dir):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    num_styles = 16
    image_size = 512
    pastichemodel = PasticheModel(num_styles)
    inference = Inferencer(pastichemodel,device,image_size)
    inference.load_model_weights(model_save_dir)
    return inference

def create_music_video(tracks, image_folders, models, ims_per_style=20):
    pos1_floor = 0
    pos2_floor = 0.4
    pos3_floor = 0.65
    pos4 = 0.85
    for ii, t in tracks.reset_index().iterrows():
        # get ref_ims
        ref_im, masks = get_ref_and_masks(image_folders[ii % len(image_folders)])
        
        # load model and transform images
        index = int(ii/len(image_folders)) % len(models)
        inference = load_model(models[index])
        t_ims_str_high = prep_transformations(ref_im, inference, ims_per_style)
        t_ims_str_low = prep_transformations(ref_im, inference, ims_per_style, strength=0.6)
        t_ims = [t_ims_str_high, t_ims_str_low]
        
        # load and analyze audio
        z, sr = librosa.load(t.fpath.replace("'", "&#39;"), offset = 0.0, duration = None)
        out_audio_path = r'C:\deep_style_beats\audio\tmp.wav'
        sf.write(out_audio_path, z, sr)
        _, z = librosa.effects.hpss(z)
        hop_length = 735
        Nfft = 2048
        n_mels = 100
        bass_clip = 3
        z = librosa.feature.melspectrogram(z, sr=sr, n_fft=Nfft, hop_length=hop_length, n_mels=n_mels+bass_clip)
        z = z[bass_clip:, :] #clip lowest bass
        z= librosa.power_to_db(z)
        z = (z - z.min()) / (z.max() - z.min())
        z = get_beats(z, 4)
#         print(len(z))
#         print(len(z[0]))
#         print(z[0][200])
        
        # setup video
        out_video_path = r'C:\deep_style_beats\video\tmp.avi'
        writer = imageio.get_writer(out_video_path, fps=30, mode='I', macro_block_size=1)
        for i in tq.trange(len(z[0]), desc=t.title):
#             print(i)
#             print(len(z))
#             print(len(z[0]))
    #         if i%30 == 0:
    #             print(i/30)
            pos_move = 1/2000 #was 1/1000
            pos1_floor += pos_move + z[0][i]/1200
            pos2_floor += pos_move + z[2][i]/1200
            pos3_floor += pos_move + z[1][i]/1200
            pos1 = pos1_floor + z[0][i]/16
            pos2 = pos2_floor + z[2][i]/16
            pos3 = pos3_floor + z[1][i]/16
            pos4 += z[3][i]/300 + 1/2000
            pos = [pos1, pos2, pos3, pos4]
            str_index = [0 for i in range(len(masks))]
            str_index[-1] = 1
            im = get_transform_from_prepped_multi(t_ims, pos, masks, str_index, ims_per_style)
            writer.append_data(np.array(im, dtype='uint8'))
        writer.close()
        out_path = r'C:\deep_style_beats\composed_video\\' + t.title.replace("'", "&#39;") + '.avi'
        out_path = out_path.replace(' ', '_')
        command = 'ffmpeg -i ' + out_video_path + ' -i ' + out_audio_path + ' -c copy -map 0:v:0 -map 1:a:0 ' + out_path
        #print(command)
        subprocess.call(command, shell=True)

def parse_track_list(list_paths, mode='chillhop'):
    #list_paths should be a text file with track lists
    tl = []
    for lp in list_paths:
        with open(lp, 'r', encoding='ansi') as f:
            try:
                for l in f:
                    if l.strip() == '':
                        continue
                    t = {}
                    try:
                        t['artist'], res = l.split(' - ')
                    except ValueError:
                        print(lp)
                        print(l)
                        print(hello)
                    t['title'] = res.split(' https:')[0]
                    t['link'] = res.split(t['title']+' ')[1].rstrip()
                    t['fpath'] = lp.replace('track_list.txt', t['title'] + '.mp3')
                    tl.append(t)
            except UnicodeDecodeError:
                print(lp)
    return pd.DataFrame.from_dict(tl)

def print_track_list(mode='chillhop'):
    start_time = 0
    for t in tracks:  
        minutes, seconds = divmod(start_time, 60)
        hours, minutes = divmod(minutes, 60)
        print("%02d:%02d"%(minutes,seconds), t.split('.')[0])
        zt, sr = librosa.load(audio_dir+t, offset = 0.0, duration = None, sr=bad_sr)
        start_time += librosa.get_duration(zt, bad_sr)
        
def split_df(df, n, random_state=1):
    #randomly splits the dataframe into n parts
    df = df.sample(frac=1, random_state=random_state)
    return np.split(df, n)

In [3]:

#this_df = this_df[this_df.title != 'Lonely Waves']
#this_df

In [4]:
# get track lists
set_folder = r'C:\deep_style_beats\audio\chillhop\set3'
release_folders = [x[0] for x in os.walk(set_folder)][1:]

list_paths = [r + '\\track_list.txt' for r in release_folders]
df = parse_track_list(list_paths)
df = split_df(df, 14)
#df1 = df_both.sample(frac=0.5, random_state=1)
#df2 = df_both.drop(df1.index).sample(frac=1, random_state=1)

In [5]:
len(df)

14

In [6]:
df[2]

Unnamed: 0,artist,title,link,fpath
304,Swørn,Feeling Lost,https://chll.to/13909f85,C:\deep_style_beats\audio\chillhop\set3\Reflec...
17,SwuM,Aries,https://chll.to/e31fa393,C:\deep_style_beats\audio\chillhop\set3\Aries\...
306,Swørn,Upset (hold it in),https://chll.to/910d700a,C:\deep_style_beats\audio\chillhop\set3\Reflec...
246,Leavv,Lighthouse,https://chll.to/e55fb9f1,C:\deep_style_beats\audio\chillhop\set3\Mind G...
381,Ward Wills,Comfort,https://chll.to/14736651,C:\deep_style_beats\audio\chillhop\set3\When T...
180,"Philanthrope, mommy",everything you say,https://chll.to/b9099ee6,C:\deep_style_beats\audio\chillhop\set3\Inaudi...
343,"Dontcry, Nokiaa",Eastway,https://chll.to/218bde2f,C:\deep_style_beats\audio\chillhop\set3\Stray\...
247,Leavv,Voyage,https://chll.to/d4e44865,C:\deep_style_beats\audio\chillhop\set3\Mind G...
192,fantompower,Endless Ether,https://chll.to/d60e90af,C:\deep_style_beats\audio\chillhop\set3\infini...
174,Sleepy Fish,Fireplace,https://chll.to/63eed366,C:\deep_style_beats\audio\chillhop\set3\In My ...


In [7]:
models = ["model/pastichemodel_style3_3-FINAL.pth",
    "model/pastichemodel_style2_3-FINAL.pth",
         "model/pastichemodel_style1_2-FINAL.pth",
          "model/pastichemodel-FINAL.pth"]
im_and_mask_folders = [r'C:\deep_style_beats\images\Space1',
                       r'C:\deep_style_beats\images\Space2',
                      r'C:\deep_style_beats\images\gegarin']

#path = r'C:\deep_style_beats\images\birds'
#im_and_mask_folders = [f.path for f in os.scandir(path) if f.is_dir()]
create_music_video(df[2], im_and_mask_folders, models, ims_per_style=20)

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Feeling Lost:   0%|          | 0/3799 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Aries:   0%|          | 0/4804 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Upset (hold it in):   0%|          | 0/3186 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Lighthouse:   0%|          | 0/4179 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Comfort:   0%|          | 0/5356 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

everything you say:   0%|          | 0/4756 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Eastway:   0%|          | 0/4401 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Voyage:   0%|          | 0/4179 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Endless Ether:   0%|          | 0/6057 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Fireplace:   0%|          | 0/4766 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Sleep Shop:   0%|          | 0/4151 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Wistful:   0%|          | 0/9125 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Rekindling:   0%|          | 0/6241 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Winter Winter:   0%|          | 0/6346 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Hibiscus Emoji:   0%|          | 0/3545 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Le Sud:   0%|          | 0/5575 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Snug:   0%|          | 0/4956 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Intro:   0%|          | 0/1405 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Searching:   0%|          | 0/6090 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Goodmorning:   0%|          | 0/4411 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Foggy Road:   0%|          | 0/4863 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Fate:   0%|          | 0/7624 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Clocks Forward:   0%|          | 0/4236 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Sofa Stories:   0%|          | 0/5678 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

nomads of the sea:   0%|          | 0/4353 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Driftaway:   0%|          | 0/4216 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Something Warm:   0%|          | 0/6018 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Prepping styles:   0%|          | 0/16 [00:00<?, ?it/s]

Mercadia:   0%|          | 0/3823 [00:00<?, ?it/s]

In [16]:
df[2].loc[110].fpath

"C:\\deep_style_beats\\audio\\chillhop\\set2\\We Must Believe in Spring\\Joey's Gone.mp3"

In [8]:
#combine all composed vidoes into 1 big video
#full_video_fname = 'set3_a.mp4'
#full_video_dir = r'C:\deep_style_beats\full_video\\'
#out_path = full_video_dir + full_video_fname
track_list_path = r'C:\deep_style_beats\video\tmp_track_list.txt'
with open(track_list_path, 'w') as w:
    for ii, t in df[2].reset_index().iterrows():
        print(t.title)
        track_path = r'C:\deep_style_beats\composed_video\\' + t.title.replace(' ', '_') + '.avi'
        w.write("file '"+track_path+"'\n")



Feeling Lost
Aries
Upset (hold it in)
Lighthouse
Comfort
everything you say
Eastway
Voyage
Endless Ether
Fireplace
Sleep Shop
Wistful
Rekindling
Winter Winter
Hibiscus Emoji
Le Sud
Snug
Intro
Searching
Goodmorning
Foggy Road
Fate
Clocks Forward
Sofa Stories
nomads of the sea
Driftaway
Something Warm
Mercadia


In [10]:
track_list_path = r'C:\deep_style_beats\video\tmp_track_list.txt'

full_video_fname = 'set3_c.mp4'
full_video_dir = r'C:\deep_style_beats\full_video\\'
out_path = full_video_dir + full_video_fname
command = 'ffmpeg -f concat -safe 0 -i %s -c:v copy %s -vcodec libx264 -c:a aac' %(track_list_path, out_path)
#   -fflags +genpts    ---- add before -f maybe?
print(command)
subprocess.call(command, shell=True)

ffmpeg -f concat -safe 0 -i C:\deep_style_beats\video\tmp_track_list.txt -c:v copy C:\deep_style_beats\full_video\\set3_c.mp4 -vcodec libx264 -c:a aac


0

In [11]:
#create track list description FROM TRACKLIST
this_df = df[2]
bad_sr = 100
start_time = 0
track_list_path = r'C:\deep_style_beats\video\tmp_track_list.txt'
with open(track_list_path, 'r') as w:
    for l in w:
        title = l.split('\\\\')[1][:-6]
        #print(title)
        t = this_df[this_df['title'] == title.replace('_', ' ')].iloc[0]
        #print(t)
        minutes, seconds = divmod(start_time, 60)
        hours, minutes = divmod(minutes, 60)
        print("[%02d:%02d:%02d]"%(hours,minutes,seconds), end=' ')
        print(t['artist'], '-', t['title'])
        print('    ', t['link'])
        print('')
        zt, sr = librosa.load(t['fpath'], offset = 0.0, duration = None, sr=bad_sr)
        start_time += librosa.get_duration(zt, bad_sr)


[00:00:00] Leavv - Voyage
     https://chll.to/d4e44865

[00:02:19] Swørn - Feeling Lost
     https://chll.to/13909f85

[00:04:25] SwuM - Aries
     https://chll.to/e31fa393

[00:07:06] Swørn - Upset (hold it in)
     https://chll.to/910d700a

[00:08:52] Leavv - Lighthouse
     https://chll.to/e55fb9f1

[00:11:11] Ward Wills - Comfort
     https://chll.to/14736651

[00:14:10] Philanthrope, mommy - everything you say
     https://chll.to/b9099ee6

[00:16:48] Dontcry, Nokiaa - Eastway
     https://chll.to/218bde2f

[00:19:15] fantompower - Endless Ether
     https://chll.to/d60e90af

[00:22:37] Sleepy Fish - Fireplace
     https://chll.to/63eed366

[00:25:15] Cloudchord, Soul Food Horns - Sleep Shop
     https://chll.to/909bb4c4

[00:27:34] Toonorth - Wistful
     https://chll.to/0537dc35

[00:32:38] L'Indécis - Rekindling
     https://chll.to/c2e9382c

[00:36:06] Sleepy Fish - Winter Winter
     https://chll.to/2579a2fa

[00:39:37] Strehlow - Hibiscus Emoji
     https://chll.to/d376fba2

In [12]:
# create thumbnail
inference = load_model(models[0])

In [14]:

path = r'C:\deep_style_beats\thumbnails'
ref_im, masks = get_ref_and_masks(im_and_mask_folders[0])

for i in range(16):
    im = get_multi_transformations(ref_im, [i/16, i/16, i/16], masks, strength=[1,1, 1,0.5])
    imageio.imsave(path+'\\'+str(i)+'.png', im)



In [15]:
path = r'C:\deep_style_beats\thumbnails\final.jpg'
ref_im, masks = get_ref_and_masks(im_and_mask_folders[0])
im = get_multi_transformations(ref_im, [6/16, 3/16, 8/16], masks, strength=[1,1,1,0.5])
imageio.imsave(path, im)

