In [1]:
import os
import pandas as pd
import numpy as np
import neural_style
import download_models
from PIL import Image
import shutil

In [40]:
def choose_classes(labels, n_classes, n_samples):
    """
    Chooses n_classes that contain at least n_train, n_test : train,test samples from the given labels dataframe.
    parameters:
        labels: pandas dataframe containing filenames, labels and train/test split
        n_classes: required number of classes
        n_samples: required number of samples in a class
    returns:
        chosen_classes: an array of the names of the classes chosen    
    """
    #number of classes
    total_classes = labels['CATEGORY'].nunique()
    #array of classes
    classes = labels.CATEGORY.unique()

    assert total_classes >= n_classes, "n_classes must be smaller than the number of available classes. (choose_classes function)"

    #create random permutation of n_classes from all classes
    class_random_sampling = np.arange(total_classes)
    class_random_sampling = np.random.permutation(class_random_sampling)
    
    samples_per_class = labels.groupby(['CATEGORY'])['ORIG_CATEGORY_FILENAME'].count().reset_index(name='count')
    chosen_classes = []
    for c in class_random_sampling:
        samples_per_current_class = samples_per_class.loc[samples_per_class['CATEGORY']==classes[c]]
        #get amount of rows in this class, as INTEGER instead of SERIES >.<
        samples_count_current_class = samples_per_current_class['count'].iloc[0]
        if (samples_count_current_class < n_samples):
            #if not enough samples, we ignore this class and take the next one
            continue
        chosen_classes.append(classes[c])
        
        if len(chosen_classes) == n_classes:
            break

    return chosen_classes

def choose_styles(labels, n_styles):
    """
    Chooses n_styles randomly from the labels dataframe. 
    parameters:
        labels: pandas dataframe containing filenames, style labels and train/test split of each file
        n_styles: number of styles to choose
    returns:
        chosen_styles: an array of the names of the styles chosen
    """
    #number of style classes
    total_classes = labels['STYLE'].nunique()
    #array of classes
    classes = labels.STYLE.unique()
    
    assert total_classes >= n_styles, "n_classes must be smaller than the number of available classes. (choose_styles function)"
    
    #create random permutation of n_classes from all classes
    class_random_sampling = np.arange(total_classes)
    class_random_sampling = np.random.permutation(class_random_sampling)

    chosen_styles = []
    for s in class_random_sampling:
        chosen_styles.append(classes[s])
        if chosen_styles == n_styles:
            break
    return chosen_styles
 
def choose_style_image(style_location, style):
    """
    Given a style, finds its associated images using the label file in style_location and returns a random filename of the style.
    parameters:
        style_location: location containing the folder with style images and the label.csv
        style: name of the style
    returns:
        chosen_image: location of the chosen style image
    """
    
    label = os.path.join(style_location, "labels.csv")
    images_location = os.path.join(style_location, "data")

    label_csv = pd.read_csv(label)
    label_csv.rename(columns = {"FILENAME" : "ORIG_STYLE_FILENAME"}, inplace=True)
    
    #get all filenames of images of said style
    style_images = label_csv.loc[label_csv['STYLE']==style]['ORIG_STYLE_FILENAME']
    #choose a random image from this list
    random_index = int(len(style_images) * np.random.rand(1))
    chosen_image = style_images.iloc[random_index]
    return chosen_image

In [41]:
def create_stylized_dataset(location, location_styles, n_classes, n_styles, 
                            n_samples,
                            output_location,
        #the rest are remaining neural style transfer arguments
        p_style_weight="1e2", p_content_weight="5e0", p_num_iterations="1000", p_learning_rate = "1e0", 
        p_gpu="0", p_image_size="512", p_style_blend_weights="None", p_normalize_weights="False", p_normalize_gradients="False", p_tv_weight="1e-3", p_init='random', p_init_image="None", p_optimizer='lbfgs', 
        p_lbfgs_num_correction="100",
        p_print_iter="0", p_save_iter="0", p_style_scale="1.0", p_original_colors = "0", p_model_file='models/vgg19-d01eb7cb.pth', p_disable_check="False",
        p_backend='nn', p_cudnn_autotune="False", p_pooling='max',
        p_seed="-1", p_content_layers='relu4_2', p_style_layers='relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', p_multidevice_strategy='4,7,29'):
    """
    Given a domain, applies style transfer to n_samples per each class,style group.
    Generates a .csv file which contains the filenames, the classes, and the assigned style to them.
    
    parameters:
        location: the location(s) of the domain(s) to be used; if multiple domains given, choose randomly;
                format of domain folders:
                    location_folder
                        >data
                            >>all images will be in this folder
                        >labels.csv
                            >>this file will contain the columns: filename, label, split
                                >>filename: name of the file
                                >>label: name of the class
                                N/A>>split: test or train; specifying the split of the sample
        location_styles: the location with the styles to be used; format:
            location_styles
                >data
                    >>all style images will be in this folder
                >labels.csv
                    >>this file will contain the columns: filename, label
                        >>filename: name of the style file
                        >>label: name of the style
                        N/A>>split: which split the style is in

        n_classes: amount of classes from the dataset to apply styles to
        n_styles: amount of styles to apply
            (n_classes, n_styles) should be equal?
        n_samples: number of samples per class
        output_location: location where to output the label, data
    output:
        tbd
    """
    if type(location)==str:
        pass
    elif type(location) in (list,tuple):
        location = random.choice(location)
    else:
        #you can't have neither a list, tuple nor a str!!!!
        raise Exception("Please don't do this to me (╥﹏╥)")
    
    data = os.path.join(location, "data")
    label_loc = os.path.join(location, "labels.csv")
    labels = pd.read_csv(label_loc)
    
    #rename filename to orig category name
    labels.rename(columns = {"FILENAME" : "ORIG_CATEGORY_FILENAME"}, inplace=True)
    
    
    n_samples_per_class = n_styles*n_samples #e.g. 40 samples per group, 3 styles means we need 120 samples for a class
    
    chosen_classes = choose_classes(labels, n_classes, n_samples_per_class)
    assert len(chosen_classes) == n_classes, "Likely there aren't enough classes to have at least n_samples samples (too many or too little classes were chosen)"

    style_label_loc = os.path.join(location_styles, "labels.csv")
    style_labels = pd.read_csv(style_label_loc)    
    
    style_labels.rename(columns = {"FILENAME" : "ORIG_STYLE_FILENAME"}, inplace=True)
    
    chosen_styles = choose_styles(style_labels, n_styles)    
    assert len(chosen_styles) == n_styles, "Likely there aren't enough styles offered in the style label.csv file"

    #the assumption is that n_classes, n_styles are equal
    
    groups_style_class = []
    for c in chosen_classes:
        for s in chosen_styles:
            groups_style_class.append([c,s])

    labels_wstyles = pd.DataFrame()


    for g_s_c in groups_style_class:
        #select the class from the g_s_c pair
        c = g_s_c[0]
        #select samples from current class c
        samples = labels[labels['CATEGORY']==c].reset_index(drop=True)

        #get permutations for train,test samples
        random_samples_permutation = np.random.permutation(np.arange(len(samples)))


        #select first n_samples samples from permutation
        chosen_samples = samples.iloc[random_samples_permutation[0:n_samples]][['ORIG_CATEGORY_FILENAME', 'CATEGORY']]
        #reset indices of chosen samples
        chosen_samples = chosen_samples.reset_index(drop=True)

        #get permutations for group samples
        chosen_samples_permutation = np.random.permutation(np.arange(len(chosen_samples)))
        
        chosen_style = g_s_c[1]
        
        chosen_images = chosen_samples_permutation[:n_samples]
        chosen_samples_permutation = chosen_samples_permutation[n_samples:]
        chosen_samples.loc[chosen_images, 'STYLE'] = chosen_style

        labels_wstyles = pd.concat((labels_wstyles, chosen_samples), ignore_index=True)

    #for all entries, choose a style image of given style
    style_location_list = []
    for index,row in labels_wstyles.iterrows():
        style_location_list.append(choose_style_image(location_styles, row['STYLE']))

    style_location_df = pd.DataFrame(style_location_list, columns=['ORIG_STYLE_FILENAME'])
    labels_wstyles['ORIG_STYLE_FILENAME'] = style_location_df
    
    #TODO: add neural style transferoutput_location
    #TODO: formalize output
    output_label = os.path.join(output_location, 'label.csv')
    output_data = os.path.join(output_location, "data")

    #create folders necessary for output_data (and output_label)
    os.makedirs(output_data, exist_ok=True)
    
    labels_wstyles['FILENAME'] = labels_wstyles['ORIG_CATEGORY_FILENAME'] + "_" + labels_wstyles['ORIG_STYLE_FILENAME']
    labels_wstyles.to_csv(output_label)

    #create stylised dataset
    for index,row in labels_wstyles.iterrows():
        location_style_image = os.path.join(location_styles, "data", row['ORIG_STYLE_FILENAME'])
        location_content_image = os.path.join(location, "data", row['ORIG_CATEGORY_FILENAME'])
        location_output_image = os.path.join(output_location, "data", row['FILENAME'])
        
        with Image.open(location_content_image) as img:
            width, height = img.size
            #choosing the smaller value between image size, and the requested p_image_size
            #target_image_size = int(max(width, height))
            target_image_size = min(p_image_size, int(max(width, height)))
            
        
        command = "/kaggle/usr/lib/neural_style/neural_style.py -style_image %s -style_blend_weights %s -content_image %s -image_size %s -gpu %s -content_weight %s -style_weight %s normalize_weights %s -normalize_gradients %s -tv_weight %s -num_iterations %s -init %s -init_image %s -optimizer %s -learning_rate %s -lbfgs_num_correction %s -print_iter %s -save_iter %s -output_image %s -style_scale %s -original_colors %s -pooling %s -model_file %s -disable_check %s -backend %s -cudnn_autotune %s -seed %s -content_layers %s -style_layers %s -multidevice_strategy %s" %(
                location_style_image, p_style_blend_weights, location_content_image, target_image_size, p_gpu, p_content_weight, p_style_weight, p_normalize_weights, p_normalize_gradients, p_tv_weight, p_num_iterations, p_init, p_init_image, p_optimizer, p_learning_rate, p_lbfgs_num_correction, p_print_iter, p_save_iter, location_output_image, p_style_scale, p_original_colors, p_pooling, p_model_file, p_disable_check, p_backend, p_cudnn_autotune, p_seed, p_content_layers, p_style_layers, p_multidevice_strategy)
        !python3 $command
        print("Finished running: %s" %command)
                    
    return 0

In [42]:
p_model_location = "/kaggle/working/models"
download_models.main(p_model_location)
p_model_file = os.path.join(p_model_location, "vgg19-d01eb7cb.pth")

All models have been successfully downloaded


In [43]:
location = "/kaggle/input/ter-set-1/Human_Actions/Human_Actions"
location_styles = "/kaggle/input/ter-set-1/Classified_Style_Dataset/Classified_Style_Dataset"
n_classes = 3
n_styles = 3
n_samples = 40

output_location = "/kaggle/working/output/Human_Actions_Stylized_Experiment_12122022"

create_stylized_dataset(location, location_styles, n_classes, n_styles, n_samples, output_location,
                        p_model_file = p_model_file, p_original_colors = "1", p_style_weight=25, p_image_size=512
        #the rest are remaining neural style transfer arguments
                       )
"""
        ,p_style_weight=1e2, p_content_weight=5e0, p_num_iterations=1000, p_learning_rate = 1e0, 
        p_gpu=0, p_image_size=512, p_style_blend_weights=None, p_normalize_weights=False, p_normalize_gradients=False, p_tv_weight=1e-3, p_init='random', p_init_image=None, p_optimizer='lbfgs', 
        p_lbfgs_num_correction=100,
        p_print_iter=0, p_save_iter=0, p_style_scale=1.0, p_original_colors = 0, p_model_file='models/vgg19-d01eb7cb.pth', p_disable_check=False, 
        p_backend='nn', p_cudnn_autotune=False, p_pooling='max',
        p_seed=-1, p_content_layers='relu4_2', p_style_layers='relu1_1,relu2_1,relu3_1,relu4_1,relu5_1', p_multidevice_strategy='4,7,29')
"""
print()

All models have been successfully downloaded
VGG-19 Architecture Detected
Successfully loaded /kaggle/working/models/vgg19-d01eb7cb.pth
Capturing style target 1
Running optimization with L-BFGS
Finished running: /kaggle/usr/lib/neural_style/neural_style.py -style_image /kaggle/input/ter-set-1/Classified_Style_Dataset/Classified_Style_Dataset/data/Soil_12.jpg -style_blend_weights None -content_image /kaggle/input/ter-set-1/Human_Actions/Human_Actions/data/riding_a_bike_253.jpg -image_size 512 -gpu 0 -content_weight 5e0 -style_weight 25 normalize_weights False -normalize_gradients False -tv_weight 1e-3 -num_iterations 1000 -init random -init_image None -optimizer lbfgs -learning_rate 1e0 -lbfgs_num_correction 100 -print_iter 0 -save_iter 0 -output_image /kaggle/working/output/Human_Actions_Stylized_Experiment_12122022/data/riding_a_bike_253.jpg -style_scale 1.0 -original_colors 1 -pooling max -model_file /kaggle/working/models/vgg19-d01eb7cb.pth -disable_check False -backend nn -cudnn_au

In [39]:
#shutil.rmtree("/kaggle/working/output/")
#os.remove("/kaggle/working/output_archive.zip")
#os.makedirs("/kaggle/working/")

In [44]:
import shutil
shutil.make_archive("/kaggle/working/output_archive", 'zip', "/kaggle/working/output")

'/kaggle/working/output_archive.zip'

52