# Окружение

In [4]:
%reload_ext autoreload
%autoreload 2

In [5]:
import os
import re
import sys
import glob
import random
import numpy as np
import pandas as pd
from shutil import copy
from pprint import pprint

from pathlib import Path
from tqdm import tqdm_notebook
from collections import defaultdict
from itertools import chain
from typing import List, Dict, Any, NoReturn, Tuple, Optional, Union

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('max_colwidth', 100)

import warnings
warnings.simplefilter('ignore')

In [6]:
sys.path.insert(0, "..")
from config import init_config, config

## Определим пути

In [11]:
# For run mode
INITIAL_DATA_DIR = Path('D:\\Data\\EyesSimulation Sessions\\Export3')

init_config("../set_locations.ini")
TRAIN_DIR = config.get("DataPaths", "train_data")
OWNER_DIR = config.get("DataPaths", "owner_data")
RUN_DIR = config.get("DataPaths", "run_data")

In [12]:
# For train-test in training mode

GENERAL_DATA_DIR = Path('D:\\Data\\EyesSimulation Sessions\\Export_full')
INITIAL_DATA_DIR = GENERAL_DATA_DIR / "Export_full"
TEST_SEEN_DATA_DIR = GENERAL_DATA_DIR / "test_seen"
TEST_UNSEEN_DATA_DIR = GENERAL_DATA_DIR / "test_unseen"
TRAIN_DATA_DIR = GENERAL_DATA_DIR / "train"

# Целый датасет

In [13]:
sess_filenames = [fn for fn in glob.glob(str(INITIAL_DATA_DIR) + "\\*.csv") if not fn.endswith("_affmatrix.csv")]
print(f"Number of sessions files avaliable: {len(sess_filenames)}")

users_filenames = glob.glob(str(INITIAL_DATA_DIR) + "\\*.txt")
print(f"Number of users meta files avaliable: {len(users_filenames)}")

Number of sessions files avaliable: 1826
Number of users meta files avaliable: 1826


In [16]:
meta_df = []
for mfn in tqdm_notebook(users_filenames):
    mdf = pd.read_csv(mfn, delimiter = "\t", encoding="Windows-1251", 
                      header=None, error_bad_lines=False).transpose()
    mdf.columns = mdf.iloc[0]
    mdf = mdf.drop(labels=0, axis=0).dropna(how='all')
    mdf['filename'] = mfn
    meta_df.append(mdf)
meta_df = pd.concat(meta_df).reset_index(drop=True)
meta_df['full_name'] = (meta_df['last_name'].fillna("") + " " + meta_df['first_name'].fillna("")).str.strip()
meta_df['session_filename'] = meta_df.filename.apply(lambda x: ("_".join(x.split("_")[:-1]) + ".csv"))
meta_df['user_id'] = meta_df.full_name.replace(to_replace={user:i for i, user in enumerate(meta_df.full_name.unique())})
print("Unique users:", meta_df.full_name.nunique())
meta_df   

HBox(children=(FloatProgress(value=0.0, max=1826.0), HTML(value='')))

b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'
b'Skipping line 2: expected 2 fields, saw 3\n'



Unique users: 142


Unnamed: 0,last_name,first_name,middle_name,gender,age,left_handed,left_sighted,chronic_illness,vision_characteristic,occupation,additional_info,photo_path,filename,Левый: -0.7,full_name,session_filename,user_id
0,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
1,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
2,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
3,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
4,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
5,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
6,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
7,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
8,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_1...,0
9,Пользователь,32,,female,49,0,0,,,,1;Краснодарский край;5;5,C:\Programs\EyeTrackerShow\selfies\IMG-9503-2020-11-30_16-00-08.png,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_s#1_mp4_Sun_Dec_27_1...,,Пользователь 32,D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_s#1_mp4_Sun_Dec_27_1...,0


In [17]:
by_person = meta_df.groupby(by=['user_id', 'full_name']).agg({'filename': lambda x: list(x),
                                                            'session_filename': lambda x: list(x)}).reset_index()
by_person['n_sessions'] = by_person.filename.apply(lambda x: len(x))

print("Unique users:", meta_df.full_name.nunique())
by_person

Unique users: 142


Unnamed: 0,user_id,full_name,filename,session_filename,n_sessions
0,0,Пользователь 32,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\32_Пользователь_cat_f#1_mp4_Sun_Dec_27_...,18
1,1,Шалтаева Аида,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Аида_Шалтаева_kot#0_mp4_Wed_Sep__9_11_2...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Аида_Шалтаева_kot#0_mp4_Wed_Sep__9_11_2...,2
2,2,Хаметзянов Александр,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр__Хаметзянов__cat_fast_mp4_Wed...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр__Хаметзянов__cat_fast_mp4_Wed...,20
3,3,Зуйков Александр,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр_Зуйков_kot#0_mp4_Wed_Sep__9_1...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр_Зуйков_kot#0_mp4_Wed_Sep__9_1...,2
4,4,Лаврентьев Александр,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр_Лаврентьев_kot#0_mp4_Fri_Sep_...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр_Лаврентьев_kot#0_mp4_Fri_Sep_...,2
5,5,Поляков Александр,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр_Поляков_cat_fast_mp4_Sat_Mar_...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александр_Поляков_cat_fast_mp4_Sat_Mar_...,20
6,6,Чебураева Александра,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александра_Чебураева_cat_f#1_mp4_Tue_Ma...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Александра_Чебураева_cat_f#1_mp4_Tue_Ma...,20
7,7,Бойко Алексей,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Алексей_Бойко_cat_fast_mp4_Wed_Mar__3_1...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Алексей_Бойко_cat_fast_mp4_Wed_Mar__3_1...,20
8,8,Ковырев Алексей,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Алексей_Ковырев_cat_fast_mp4_Tue_Mar__2...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Алексей_Ковырев_cat_fast_mp4_Tue_Mar__2...,20
9,9,Крицков Алексей,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Алексей_Крицков_cat_f#1_mp4_Tue_Mar__2_...,[D:\Data\EyesSimulation Sessions\Export_full\Export_full\Алексей_Крицков_cat_f#1_mp4_Tue_Mar__2_...,20


In [18]:
by_person.n_sessions.value_counts()

20    55
10    40
2     25
12     5
8      5
22     2
21     2
19     2
18     1
13     1
11     1
6      1
3      1
1      1
Name: n_sessions, dtype: int64

In [None]:
# meta_df.to_csv(GENERAL_DATA_DIR / "additional_dataset_metainfo.csv", sep=";", encoding="utf-8")
# by_person.to_csv(GENERAL_DATA_DIR / "dataset_info.csv", sep=";", encoding="utf-8")

# Разделяем обучающие и тестовые данные

* **Train** - 80% видео от тех пользователей, у кого их >= 10
* **Test seen** - 20%  видео от тех пользователей, у кого их >= 10
* **Test unseen** - оставшиеся видеозаписи

In [19]:
nsess_threshold = 10

print(f"More then threshold users: {by_person.loc[by_person.n_sessions >= nsess_threshold].shape[0]}")
print(f"with {by_person.loc[by_person.n_sessions >= nsess_threshold].n_sessions.sum()} sessions.\n")

print(f"Less then threshold users: {by_person.loc[by_person.n_sessions < nsess_threshold].shape[0]}")
print(f"with {by_person.loc[by_person.n_sessions < nsess_threshold].n_sessions.sum()} sessions.")

More then threshold users: 109
with 1726 sessions.

Less then threshold users: 33
with 100 sessions.


In [21]:
train_users_ids = by_person.loc[by_person.n_sessions >= nsess_threshold].user_id.to_list()
test_unseen_users_ids = by_person.loc[by_person.n_sessions < nsess_threshold].user_id.to_list()

print(f"Train sessions: {np.ceil(by_person.loc[by_person.n_sessions >= nsess_threshold].n_sessions.sum() * 0.8)}")
print(f"Test seen sessions: {by_person.loc[by_person.n_sessions >= nsess_threshold].n_sessions.sum() - np.ceil(by_person.loc[by_person.n_sessions >= nsess_threshold].n_sessions.sum() * 0.7)}")
print(f"Test unseen sessions: {by_person.loc[by_person.n_sessions < nsess_threshold].n_sessions.sum()}")

Train sessions: 1381.0
Test seen sessions: 517.0
Test unseen sessions: 100


### Utilities

In [20]:
def copy_files_to_folder(filenames: List[str], to_folder: str):
    for fn in tqdm_notebook(filenames, total=len(filenames)):
        try:
            copy(fn, to_folder)
        except Exception as err:
            print(err)
            print(f"Can't copy file {fn} to {to_folder}!\n")
            
            
def clear_folders(folders_paths: List[str]):
    for folder_fn in tqdm_notebook(folders_paths, total=len(folders_paths)):
        files = glob.glob(folder_fn + "/*")
        for f in files:
            try:
                os.remove(f)
            except IsADirectoryError:
                print(f"Oops, found a folder inside clearing folder. Skipping.")
                
def meta_to_data(fn: str):
    return "_".join(fn.split("_")[:-1]) + ".csv"

def data_to_meta(fn: str):
    return ".".join(fn.split(".")[:-1]) + "_metadata.txt"

In [23]:
def k_way_n_shot_split(metainfo_df: pd.DataFrame, 
                       train_users_ids: List[int], train_ratio: float,
                       train_dir: str, test_seen_dir: str, test_unseen_dir: str):
    clear_folders([train_dir, test_seen_dir, test_unseen_dir])
    
    # Split seen and unseen data
    all_seen_meta_fns = list(np.concatenate(metainfo_df.loc[metainfo_df.user_id.isin(train_users_ids)].filename.values).flatten())
    print(f"All metafiles: {len(all_seen_meta_fns)}")
    train_n = int(np.ceil(len(all_seen_meta_fns) * train_ratio))
    test_n = len(all_seen_meta_fns) - int(np.ceil(len(all_seen_meta_fns) * train_ratio))
    print(f"Train number: {train_n}, test number: {test_n}")
    random.shuffle(all_seen_meta_fns)
    
    # Train data
    train_meta_fns = random.sample(all_seen_meta_fns, k=train_n+1)
    train_data_fns = [meta_to_data(fn) for fn in train_meta_fns]
    
    # Seen test data
    test_meta_fns = [fn for fn in all_seen_meta_fns if fn not in train_meta_fns]
    test_data_fns = [meta_to_data(fn) for fn in test_meta_fns]
    
    # Unseen test data
    test_unseen_meta_fns = list(np.concatenate(metainfo_df.loc[~metainfo_df.user_id.isin(train_users_ids)].filename.values).flatten())
    test_unseen_data_fns = list(np.concatenate(metainfo_df.loc[~metainfo_df.user_id.isin(train_users_ids)].session_filename.values).flatten())
    
    print(f"Train metafiles: {len(train_meta_fns)}, data: {len(train_data_fns)}") 
    print(f"Seen test metafiles: {len(test_meta_fns)}, data: {len(test_data_fns)}") 
    print(f"Unseen test metafiles: {len(test_unseen_meta_fns)}, data: {len(test_unseen_data_fns)}")
    
    # Copy owner data
    copy_files_to_folder(train_meta_fns + train_data_fns, train_dir)
    copy_files_to_folder(test_meta_fns + test_data_fns, test_seen_dir)
    copy_files_to_folder(test_unseen_meta_fns + test_unseen_data_fns, test_unseen_dir)
    
    print("Done coping!")
    return ({
        "train": train_meta_fns + train_data_fns,
        "test_seen": test_meta_fns + test_data_fns,
        "test_unseen": test_unseen_meta_fns + test_unseen_data_fns
    })

In [None]:
split = k_way_n_shot_split(by_person, train_users_ids=train_users_ids, train_ratio=0.8,
                          train_dir=str(TRAIN_DATA_DIR), test_seen_dir=str(TEST_SEEN_DATA_DIR), 
                           test_unseen_dir=str(TEST_UNSEEN_DATA_DIR))

# Тест №1. 
1. Выбирается владелец и одна его запись (случайная);
2. Выбирается случайные N записей других пользователей (из числа тех, у кого записей больше среднего) и M записей владельца (случайных, кроме выбранной в п.1);
3. Сохраняются в ранее определенные пути и выбранные N+M записей исключаются из обучающего набора.

In [8]:
def test_split_var1(df: pd.DataFrame, owner_name: str, N: int, M: int):
    clear_folders([RUN_DIR, TRAIN_DIR, OWNER_DIR])
    
    # Split owner data
    all_owner_meta_fns = df.loc[df.name == owner_name].user_fns.values[0]
    random.shuffle(all_owner_meta_fns)
    owner_meta_fns = random.sample(all_owner_meta_fns, k=M+1)
    owner_train_meta_fns = [fn for fn in all_owner_meta_fns if fn not in owner_meta_fns]
    owner_train_data_fns = ["_".join(fn.split("_")[:-1]) + ".csv" for fn in owner_train_meta_fns]
    owner_main_fns = [owner_meta_fns[0], ("_".join(owner_meta_fns[0].split("_")[:-1]) + ".csv")]
    owner_run_meta_fns = [fn for fn in owner_meta_fns[1:]]
    owner_run_data_fns = ["_".join(fn.split("_")[:-1]) + ".csv" for fn in owner_run_meta_fns]
    
    # Select other users data
    all_others_meta_fns = list(chain.from_iterable(df.loc[(df.n_sessions > df.n_sessions.mean()) &
                                                         (df.name != owner_name)].user_fns.to_list()))
    random.shuffle(all_others_meta_fns)
    others_meta_fns = random.sample(all_others_meta_fns, k=N)
    others_train_meta_fns = [fn for fn in all_others_meta_fns if fn not in others_meta_fns]
    others_train_data_fns = ["_".join(fn.split("_")[:-1]) + ".csv" for fn in others_train_meta_fns]
    others_run_meta_fns = [fn for fn in others_meta_fns]
    others_run_data_fns = ["_".join(fn.split("_")[:-1]) + ".csv" for fn in others_run_meta_fns]
    
    print(f"Owner all: {len(all_owner_meta_fns)}, train: {len(owner_train_meta_fns)},"
          f" main: {len(owner_main_fns) // 2}, run: {len(owner_run_meta_fns)}") 
    print(f"Others all: {len(all_others_meta_fns)}, train: {len(others_train_meta_fns)}, run: {len(others_run_meta_fns)}") 
    
    # Copy owner data
    copy_files_to_folder(owner_train_meta_fns + others_train_data_fns, TRAIN_DIR)
    copy_files_to_folder(owner_main_fns, OWNER_DIR)
    copy_files_to_folder(owner_run_meta_fns + owner_run_data_fns, RUN_DIR)
    
    # Copy others data
    copy_files_to_folder(others_train_meta_fns + others_train_data_fns, TRAIN_DIR)
    copy_files_to_folder(others_run_meta_fns + others_run_data_fns, RUN_DIR)
    
    print("Done coping!")
    return ({
        "owner_run": owner_run_meta_fns + owner_run_data_fns,
        "owner_main": owner_main_fns,
        "others_run": others_run_meta_fns + others_run_data_fns
    })

In [9]:
data_fns = test_split_var1(users_df, "Даниил Бонк", N=10, M=4)

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


Owner all: 22, train: 17, main: 1, run: 4
Others all: 161, train: 151, run: 10


HBox(children=(FloatProgress(value=0.0, max=168.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Done coping!


# Тест №2. 
1. Выбирается владелец и одна **определенная** его запись (по наиментованию);
2. Выбирается случайные N записей других пользователей (из числа тех, у кого записей больше среднего) и M записей владельца (случайных, кроме выбранной в п.1);
3. Сохраняются в ранее определенные пути и выбранные N+M записей исключаются из обучающего набора.

In [10]:
def test_split_var2(df: pd.DataFrame, owner_name: str, 
                    owner_init_fn: str, N: int, M: int):
    clear_folders([RUN_DIR, TRAIN_DIR, OWNER_DIR])
    
    # Split owner data
    all_owner_meta_fns = df.loc[df.name == owner_name].user_fns.values[0]
    random.shuffle(all_owner_meta_fns)
    
    # Select main owner data and meta files
    owner_main_meta_fn = all_owner_meta_fns.pop(all_owner_meta_fns.index(owner_init_fn))
    owner_main_fns = [owner_main_meta_fn, meta_to_data(owner_main_meta_fn)]
    # Train owner data
    owner_meta_fns = random.sample(all_owner_meta_fns, k=M+1)
    owner_train_meta_fns = [fn for fn in all_owner_meta_fns if fn not in owner_meta_fns]
    owner_train_data_fns = [meta_to_data(fn) for fn in owner_train_meta_fns]
    # Run owner data
    owner_run_meta_fns = [fn for fn in owner_meta_fns[1:]]
    owner_run_data_fns = ["_".join(fn.split("_")[:-1]) + ".csv" for fn in owner_run_meta_fns]
    
    # Select other users data
    all_others_meta_fns = list(chain.from_iterable(df.loc[(df.n_sessions > df.n_sessions.mean()) &
                                                         (df.name != owner_name)].user_fns.to_list()))
    random.shuffle(all_others_meta_fns)
    others_meta_fns = random.sample(all_others_meta_fns, k=N)
    others_train_meta_fns = [fn for fn in all_others_meta_fns if fn not in others_meta_fns]
    others_train_data_fns = [meta_to_data(fn) for fn in others_train_meta_fns]
    others_run_meta_fns = [fn for fn in others_meta_fns]
    others_run_data_fns = [meta_to_data(fn) for fn in others_run_meta_fns]
    
    print(f"Owner all: {len(all_owner_meta_fns)}, train: {len(owner_train_meta_fns)},"
          f" main: {len(owner_main_fns) // 2}, run: {len(owner_run_meta_fns)}") 
    print(f"Others all: {len(all_others_meta_fns)}, train: {len(others_train_meta_fns)}, run: {len(others_run_meta_fns)}") 
    
    # Copy owner data
    copy_files_to_folder(owner_train_meta_fns + others_train_data_fns, TRAIN_DIR)
    copy_files_to_folder(owner_main_fns, OWNER_DIR)
    copy_files_to_folder(owner_run_meta_fns + owner_run_data_fns, RUN_DIR)
    
    # Copy others data
    copy_files_to_folder(others_train_meta_fns + others_train_data_fns, TRAIN_DIR)
    copy_files_to_folder(others_run_meta_fns + others_run_data_fns, RUN_DIR)
    
    print("Done coping!")
    return ({
        "owner_run": owner_run_meta_fns + owner_run_data_fns,
        "owner_main": owner_main_fns,
        "others_run": others_run_meta_fns + others_run_data_fns
    })

In [12]:
data_fns = test_split_var2(users_df, "Даниил Бонк", 
                              'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-19-00_cat_s#1.mp4_metadata.txt',
                               N=10, M=4)

data_fns

HBox(children=(FloatProgress(value=0.0, max=3.0), HTML(value='')))


Owner all: 21, train: 16, main: 1, run: 4
Others all: 161, train: 151, run: 10


HBox(children=(FloatProgress(value=0.0, max=167.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=2.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=8.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=302.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=20.0), HTML(value='')))


Done coping!


{'owner_run': ['D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-19-46_cat_f#1.mp4_metadata.txt',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-18-23_cat_s#1.mp4_metadata.txt',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-17-09_cat_f#1.mp4_metadata.txt',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-15_15-17-19_cat_f#1.mp4_metadata.txt',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-19-46_cat_f#1.mp4.csv',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-18-23_cat_s#1.mp4.csv',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-17-09_cat_f#1.mp4.csv',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-15_15-17-19_cat_f#1.mp4.csv'],
 'owner_main': ['D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-19-00_cat_s#1.mp4_metadata.txt',
  'D:\\Data\\EyesSimulation Sessions\\Export3\\exp-2020-12-16_18-19-00_cat_s#1.mp4.csv'],
 'others_run': ['D:\\Data\\EyesSimul