In [1]:
import os
import pandas as pd
import numpy as np
import sys
import csv
from time import strptime
import json
from datetime import datetime as dt
from datetime import date
from statistics import mean
import math
import pickle
import time

In [2]:
# csv.field_size_limit(sys.maxsize)
# np.set_printoptions(threshold=sys.maxsize)
pd.options.display.max_columns = None
pd.options.display.float_format = '{:20,.15f}'.format
# pd.options.display.max_rows = 10000
# pd.set_option('display.max_colwidth', -1)

In [None]:
def reduce_mem_usage(df):
    """ iterate through all the columns of a dataframe and modify the data type
        to reduce memory usage.        
    """
    start_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB'.format(start_mem))
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
        else:
            df[col] = df[col].astype('category')

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage after optimization is: {:.2f} MB'.format(end_mem))
    print('Decreased by {:.1f}%'.format(100 * (start_mem - end_mem) / start_mem))
    
    return df

In [3]:
def calc_smooth_mean(df, by, on, m):
    mean = df[on].mean()
    agg = df.groupby(by)[on].agg(['count', 'mean'])
    counts = agg['count']
    means = agg['mean']
    smooth = (counts * means + m * mean) / (counts + m)
    return df[by].map(smooth)

In [None]:
df_file = pd.read_csv('savedcsv/17_homepage.csv')
df_file = df_file.drop(df_file[df_file['META__id'] == 6].index)
df_file.to_csv('datasets/df_file.csv', index=False)
df_file = pd.read_csv('datasets/df_file.csv')

In [5]:
# df_raw = reduce_mem_usage(pd.read_csv('savedcsv/17_homepage.csv'))
df_raw = pd.read_csv('savedcsv/17_homepage.csv')
df_raw['META__profitability'] = df_raw['profitability']
df_raw['META__year_avg_profitability'] = df_raw['year_avg_profitability']
df_raw = df_raw.drop(['profitability', 'year_avg_profitability', 'PROCESS__original_language'], axis=1)
drop_cols = [c for c in df_raw.columns if 'META' not in c and ('avg' in c or 'experience' in c or 'movies_before' in c)]
df_raw = df_raw.drop(drop_cols, axis=1)
df_raw = df_raw.drop(df_raw[df_raw['META__id'] == 6].index)
df_raw.to_csv('datasets/df_raw.csv', index=False)
df_raw = pd.read_csv('datasets/df_raw.csv')

In [6]:
df_raw = pd.read_csv('datasets/df_raw.csv')

In [15]:
df_raw.META__revenue.quantile([0.05, 0.95])

0.050000000000000       111,229.200000000011642
0.950000000000000   348,514,752.599999785423279
Name: META__revenue, dtype: float64

In [None]:
num = df_raw[
    (df_raw.META__profitability > df_raw.META__profitability.quantile(0.01)) & 
    (df_raw.META__profitability < df_raw.META__profitability.quantile(0.99)) &
    (df_raw.META__year >= 2000) &
    (df_raw.META__year <= 2018) &
    (df_raw.budget >= 250000)
].shape[0]

print(num)
print(num/df_raw.shape[0])

In [None]:
num = df_raw[
    (df_raw.META__revenue > df_raw.META__profitability.quantile(0.01)) & 
    (df_raw.META__profitability < df_raw.META__profitability.quantile(0.99)) &
    (df_raw.META__year >= 2000) &
    (df_raw.META__year <= 2018) &
    (df_raw.budget >= 250000)
].shape[0]

print(num)
print(num/df_raw.shape[0])

In [7]:
def get_real_columns(name_prefix, name_suffix, num_columns):
    return [f'{name_prefix}{j}{name_suffix}' for j in range(1, num_columns+1)]

def get_date_from_movie(movie):
    return date(movie['META__year'], movie['META__month'], movie['META__day'])


def get_info_df_movies_per_value_range_columns(df, columns):
    result = {}
    for i, row in df.iterrows():
        for col in columns:
            value = row[col]
            if not pd.isna(value):
                if value in list(result.keys()):
                    result[value]['movies'] = result[value]['movies'].append(df.iloc[[i]])
                else:
                    result[value] = {'movies': df.iloc[[i]]}
    return result

# def get_info_df_movies_per_value_range_columns(df, columns):
#     def rows_with_value(value):
#         return df[df.apply(lambda row: value in [row[column] for column in columns], axis=1)]

#     unique_values = list(set([movie[column] for i, movie in df.iterrows() for column in columns]))
#     return {value: rows_with_value(value) for value in unique_values}

def calculate_info_per_previous_movies_per_value(df, 
                                                 info_dict, 
                                                 calc_budget=False, 
                                                 calc_movies_before=False, 
                                                 calc_experience=False, 
                                                 calc_revenue=False,
                                                ):
    
    def get_average_profit(df):
        return (df['META__revenue'] - df['budget']).mean()
    
    def get_experience(df, row):
        return (row['META__year'] - df.META__year.min())
    
    def is_movie_before(movie, compare_date):
        return date(movie.META__year, movie.META__month, movie.META__day) < compare_date

    for value_name in info_dict:
        value = info_dict[value_name]
        value['avg_profit'] = {}
        if calc_budget:
            value['avg_budget'] = {}
        if calc_experience:
            value['experience'] = {}
        if calc_revenue:
            value['avg_revenue'] = {}
        if calc_movies_before:
            value['movies_before'] = {}

        movies = value['movies']
        for movie_index, movie in movies.iterrows():
            current_date = get_date_from_movie(movie)
            str_date = str(current_date)
            if calc_experience and str_date not in list(value['experience'].keys()):
                value['experience'][str_date] = get_experience(movies, movie)
            if str_date not in list(value['avg_profit'].keys()):
                movies_before = movies[movies.apply(lambda row: is_movie_before(row, current_date), axis=1)]
                if not movies_before.empty:
                    value['avg_profit'][str_date] = get_average_profit(movies_before)
                    if calc_movies_before:
                        value['movies_before'][str_date] = movies_before.shape[0]
                    if calc_revenue:
                        value['avg_revenue'][str_date] = movies_before['META__revenue'].mean()                        
                    if calc_budget:
                        value['avg_budget'][str_date] = movies_before['budget'].mean()
                    

def get_columns_from_info_dict(df, info_dict, real_columns, new_column_prefix='', nested_info=False):
    def get_first_key(d):
        return d[list(d.keys())[0]]
    
    new_columns_data = {}
    info_names = [key for key in get_first_key(info_dict).keys() if key != 'movies']
    print(info_names)
    for index, real_column in enumerate(real_columns):
        print(real_column)
        new_column_names = [f'{new_column_prefix}_{key}' if len(real_columns) == 1 else f'{new_column_prefix}_{index+1}_{key}' for key in info_names] 
        print(new_column_names)
            
        for new_column_name in new_column_names:
            new_columns_data[new_column_name] = []

        for movie_index, movie in df.iterrows():
            value = movie[real_column]
            str_date = str(get_date_from_movie(movie))
            if pd.isna(value):
                for col in new_column_names:
                    new_columns_data[col].append(np.nan)
            else:
                for name_index, new_column_name in enumerate(new_column_names):
                    year_info = info_dict[value][info_names[name_index]].get(str_date, np.nan)
                    new_columns_data[new_column_name].append(year_info)
    return new_columns_data

In [8]:
def add_features(df, 
                 columns,
                 new_column_prefix,
                 calc_movies_before=False, 
                 calc_budget=False, 
                 calc_experience=False,
                 calc_revenue=False):
    print('obtaine movies')
    
    t = time.process_time()
    data_info = get_info_df_movies_per_value_range_columns(df, columns)
    print(f'time: {time.process_time() - t}')
    
    data_info_copy = data_info.copy()
    print('calculate info')
    t = time.process_time()
    calculate_info_per_previous_movies_per_value(df, 
                                                 data_info_copy, 
                                                 calc_movies_before=calc_movies_before, 
                                                 calc_budget=calc_budget, 
                                                 calc_experience=calc_experience,
                                                 calc_revenue=calc_revenue)
    print(f'time: {time.process_time() - t}')
    
    print('get columns')
    t = time.process_time()
    feature_columns = get_columns_from_info_dict(df, data_info_copy, columns, new_column_prefix=new_column_prefix)
    print(f'time: {time.process_time() - t}')
    for column in feature_columns.keys():
        df[column] = feature_columns[column]
    return df


def add_companies_features(df):
    columns = get_real_columns('META__production_company_', '', 3)
    return add_features(df, columns, 'production_company', calc_revenue=True)


def add_cast_features(df):
    columns = get_real_columns('META__cast_', '_name', 8)
    return add_features(df, columns, 'cast', calc_movies_before=True, calc_revenue=True, calc_experience=True)


def add_crew_features(df):
    for column in [c for c in list(df.columns) if 'META__crew' in c]:
        df = add_features(df, [column], column.replace('META__', ''), calc_movies_before=True, calc_revenue=True)
    return df


def add_collection_features(df):
    return add_features(df, ['META__collection_name'], 'collection', calc_revenue=True)


def get_avg_cast_info(df):
    def get_average_cast_movies(row, columns):
        avgs = [row[col] for col in columns if not pd.isna(row[col])]
        return mean(avgs) if len(avgs) else np.nan
    
    def get_cast_average(df, column_name):
        columns = [col for col in df.columns if 'cast_' in col and column_name in col and not 'cast_avg' in col]
        return [get_average_cast_movies(row, columns) for i, row in df.iterrows()]
    
    for column in ['revenue', 'profit', 'experience', 'movies_before']:
        df[f'cast_avg_{column}'] = get_cast_average(df, column)
    return df


def prepare_df(df_original):
    overall_time = time.process_time()
    df = df_original.copy()
    df = add_companies_features(df)
    df = add_cast_features(df)
    df = add_crew_features(df)
    df = add_collection_features(df)
    df = get_avg_cast_info(df)
    df['year_avg_revenue'] = calc_smooth_mean(df, by='META__year', on='META__revenue', m=5)
    print(f'overall time: {time.process_time() - overall_time}')
    return df
    

In [None]:
df_all = prepare_df(df_raw)

In [16]:
raw_datasets = {
#     'us': df_raw[
#         df_raw.country__us == 1].copy(),
#     'gb': df_raw[
#         df_raw.country__gb == 1].copy(),
#     'years2000-2018': df_raw[
#         (df_raw.META__year >= 2000) &
#         (df_raw.META__year <= 2018)].copy(),
#     'years1970-1999': df_raw[
#         (df_raw.META__year >= 1970) &
#         (df_raw.META__year <= 1999)].copy(),
#     'profitability_positive': df_raw[
#         df_raw.META__profitability > 0].copy(),
#     'profitability_negative': df_raw[
#         df_raw.META__profitability < 0].copy(),
#     'budget_start_1percent': df_raw[
#         df_raw.budget >= 8875].copy(),
#     'budget_start_5percent': df_raw[
#         df_raw.budget >= 250000].copy(),
#     'profitability_1percentile': df_raw[
#         (df_raw.META__profitability >= -138) &
#         (df_raw.META__profitability <= 65)].copy(),
#     'profitability_5percentile': df_raw[
#         (df_raw.META__profitability >= -14.9) &
#         (df_raw.META__profitability <= 13.9)].copy(),
#     'profitability_10percentile': df_raw[
#         (df_raw.META__profitability >= -4.42) &
#         (df_raw.META__profitability <= 7.4)].copy(),
#     'profitability_1percentile_years2000-2018': df_raw[
#         (df_raw.META__profitability >= -138) &
#         (df_raw.META__profitability <= 65) &
#         (df_raw.META__year >= 2000) &
#         (df_raw.META__year <= 2018)].copy(),
#     'profitability_1percentile_years2000-2018_budget_start_1percent': df_raw[
#         (df_raw.META__profitability >= -138) &
#         (df_raw.META__profitability <= 65) &
#         (df_raw.META__year >= 2000) &
#         (df_raw.META__year <= 2018) &
#         (df_raw.budget >= 8875)].copy(),
#     'profitability_1percentile_years2000-2018_budget_start_5percent': df_raw[
#         (df_raw.META__profitability >= -138) &
#         (df_raw.META__profitability <= 65) &
#         (df_raw.META__year >= 2000) &
#         (df_raw.META__year <= 2018) &
#         (df_raw.budget >= 250000)].copy(),
    'revenue_1percentile': df_raw[
        (df_raw.META__revenue >= 10000) &
        (df_raw.META__revenue <= 854060072)].copy(),
    'revenue_5percentile': df_raw[
        (df_raw.META__revenue >= 111229) &
        (df_raw.META__revenue <= 348514752)].copy(),
}

SyntaxError: invalid syntax (<ipython-input-16-78adbb5cbd03>, line 52)

In [None]:
for d in raw_datasets.keys():
    print(raw_datasets[d].shape[0])
for d in raw_datasets.keys():
    print(raw_datasets[d].shape[0]/df_raw.shape[0]*100)

In [12]:
for dataset in raw_datasets.keys():
    print('------------------------------------------------------')
    print(dataset)
    print('-------------------------------------------------------')
    raw_filename = f'datasets/dataset_{dataset}_raw.csv'
    filename = f'datasets/dataset_{dataset}.csv'
    raw_datasets[dataset].to_csv(raw_filename, index=False)
    raw_dataset = pd.read_csv(raw_filename)
    prepared = prepare_df(raw_dataset)
    prepared.to_csv(filename, index=False)

------------------------------------------------------
us
-------------------------------------------------------
obtaine movies
time: 26.411767158000004
calculate info
time: 42.984574178
get columns
['avg_profit', 'avg_revenue']
META__production_company_1
['production_company_1_avg_profit', 'production_company_1_avg_revenue']
META__production_company_2
['production_company_2_avg_profit', 'production_company_2_avg_revenue']
META__production_company_3
['production_company_3_avg_profit', 'production_company_3_avg_revenue']
time: 1.5053753009999866
obtaine movies
time: 86.712821249
calculate info
time: 65.733221748
get columns
['avg_profit', 'experience', 'avg_revenue', 'movies_before']
META__cast_1_name
['cast_1_avg_profit', 'cast_1_experience', 'cast_1_avg_revenue', 'cast_1_movies_before']
META__cast_2_name
['cast_2_avg_profit', 'cast_2_experience', 'cast_2_avg_revenue', 'cast_2_movies_before']
META__cast_3_name
['cast_3_avg_profit', 'cast_3_experience', 'cast_3_avg_revenue', 'cast_3_mo

time: 0.5112733509999998
obtaine movies
time: 3.144172088000005
calculate info
time: 1.9264206130000048
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__art__property_master
['crew__art__property_master_avg_profit', 'crew__art__property_master_avg_revenue', 'crew__art__property_master_movies_before']
time: 0.4970076780000454
obtaine movies
time: 6.906914289999975
calculate info
time: 4.3607796830000325
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__art__set_decoration
['crew__art__set_decoration_avg_profit', 'crew__art__set_decoration_avg_revenue', 'crew__art__set_decoration_movies_before']
time: 0.5000797810000108
obtaine movies
time: 2.6817989019999686
calculate info
time: 1.7981295160000172
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__visualeffects__visual_effects_supervisor
['crew__visualeffects__visual_effects_supervisor_avg_profit', 'crew__visualeffects__visual_effects_supervisor_avg_revenue', 'crew__visualef

time: 0.9345074699999714
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__editing__editor__1
['crew__editing__editor__1_avg_profit', 'crew__editing__editor__1_avg_revenue', 'crew__editing__editor__1_movies_before']
time: 0.07574419900004159
obtaine movies
time: 1.1143304999999373
calculate info
time: 0.7763923659999818
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__costume_designer
['crew__costume__costume_designer_avg_profit', 'crew__costume__costume_designer_avg_revenue', 'crew__costume__costume_designer_movies_before']
time: 0.07404425199990783
obtaine movies
time: 0.28894404700008636
calculate info
time: 0.24717410499999914
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__costume_supervisor
['crew__costume__costume_supervisor_avg_profit', 'crew__costume__costume_supervisor_avg_revenue', 'crew__costume__costume_supervisor_movies_before']
time: 0.07410398399997575
obtaine movies
time: 0.334016464999

time: 0.42887226599998485
obtaine movies
time: 2.950684682999963
calculate info
time: 1.8746830840000257
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__supervising_sound_editor
['crew__sound__supervising_sound_editor_avg_profit', 'crew__sound__supervising_sound_editor_avg_revenue', 'crew__sound__supervising_sound_editor_movies_before']
time: 0.42946020300007604
obtaine movies
time: 6.766060158999949
calculate info
time: 5.823688620999974
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__directing__director__1
['crew__directing__director__1_avg_profit', 'crew__directing__director__1_avg_revenue', 'crew__directing__director__1_movies_before']
time: 0.4294773809999697
obtaine movies
time: 3.5379795150000746
calculate info
time: 2.4022029940000493
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__directing__script_supervisor
['crew__directing__script_supervisor_avg_profit', 'crew__directing__script_supervisor_avg_reve

META__cast_3_name
['cast_3_avg_profit', 'cast_3_experience', 'cast_3_avg_revenue', 'cast_3_movies_before']
META__cast_4_name
['cast_4_avg_profit', 'cast_4_experience', 'cast_4_avg_revenue', 'cast_4_movies_before']
META__cast_5_name
['cast_5_avg_profit', 'cast_5_experience', 'cast_5_avg_revenue', 'cast_5_movies_before']
META__cast_6_name
['cast_6_avg_profit', 'cast_6_experience', 'cast_6_avg_revenue', 'cast_6_movies_before']
META__cast_7_name
['cast_7_avg_profit', 'cast_7_experience', 'cast_7_avg_revenue', 'cast_7_movies_before']
META__cast_8_name
['cast_8_avg_profit', 'cast_8_experience', 'cast_8_avg_revenue', 'cast_8_movies_before']
time: 1.3731274540000413
obtaine movies
time: 0.8491391850000127
calculate info
time: 0.4651067629999943
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__music_editor
['crew__sound__music_editor_avg_profit', 'crew__sound__music_editor_avg_revenue', 'crew__sound__music_editor_movies_before']
time: 0.16544225999996343
obtaine mov

time: 0.24610097199979464
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__visualeffects__visual_effects_supervisor
['crew__visualeffects__visual_effects_supervisor_avg_profit', 'crew__visualeffects__visual_effects_supervisor_avg_revenue', 'crew__visualeffects__visual_effects_supervisor_movies_before']
time: 0.17604218799988303
obtaine movies
time: 4.104977660000031
calculate info
time: 2.3675246579998657
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__director_of_photography
['crew__camera__director_of_photography_avg_profit', 'crew__camera__director_of_photography_avg_revenue', 'crew__camera__director_of_photography_movies_before']
time: 0.1752874650001104
obtaine movies
time: 0.5638446959999328
calculate info
time: 0.26462168100010786
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__steadicam_operator
['crew__camera__steadicam_operator_avg_profit', 'crew__camera__steadicam_operator_avg_revenue', 'crew

time: 0.4473042490001262
obtaine movies
time: 2.373101011000017
calculate info
time: 2.027755319000107
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__makeup_artist
['crew__costume__makeup_artist_avg_profit', 'crew__costume__makeup_artist_avg_revenue', 'crew__costume__makeup_artist_movies_before']
time: 0.44277028999999857
obtaine movies
time: 2.4821872279999297
calculate info
time: 1.6994325359999038
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__crew__stunt_coordinator
['crew__crew__stunt_coordinator_avg_profit', 'crew__crew__stunt_coordinator_avg_revenue', 'crew__crew__stunt_coordinator_movies_before']
time: 0.4366285040000548
obtaine movies
time: 4.173042504000023
calculate info
time: 3.9172014309999668
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__writing__screenplay__1
['crew__writing__screenplay__1_avg_profit', 'crew__writing__screenplay__1_avg_revenue', 'crew__writing__screenplay__1_movies_before']

time: 2.681670954000083
calculate info
time: 1.750702240999999
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__production__casting
['crew__production__casting_avg_profit', 'crew__production__casting_avg_revenue', 'crew__production__casting_movies_before']
time: 0.19287522599984186
obtaine movies
time: 1.1602063670000007
calculate info
time: 1.3418749209999987
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__production__executive_producer__1
['crew__production__executive_producer__1_avg_profit', 'crew__production__executive_producer__1_avg_revenue', 'crew__production__executive_producer__1_movies_before']
time: 0.1944921220001561
obtaine movies
time: 2.1278171080000448
calculate info
time: 2.1630862970000635
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__production__producer__1
['crew__production__producer__1_avg_profit', 'crew__production__producer__1_avg_revenue', 'crew__production__producer__1_movies_before']
time: 

time: 0.6248436799999126
obtaine movies
time: 10.065886653999996
calculate info
time: 8.337349462999782
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__original_music_composer
['crew__sound__original_music_composer_avg_profit', 'crew__sound__original_music_composer_avg_revenue', 'crew__sound__original_music_composer_movies_before']
time: 0.6290767949999463
obtaine movies
time: 2.7608812250000483
calculate info
time: 1.8421853489999194
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__sound_designer
['crew__sound__sound_designer_avg_profit', 'crew__sound__sound_designer_avg_revenue', 'crew__sound__sound_designer_movies_before']
time: 0.6360103659999368
obtaine movies
time: 2.869169606000014
calculate info
time: 1.8992650419997972
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__sound_effects_editor
['crew__sound__sound_effects_editor_avg_profit', 'crew__sound__sound_effects_editor_avg_revenue', 'crew_

time: 0.6467865570002687
obtaine movies
time: 4.647838453999611
calculate info
time: 3.172899217000122
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__still_photographer
['crew__camera__still_photographer_avg_profit', 'crew__camera__still_photographer_avg_revenue', 'crew__camera__still_photographer_movies_before']
time: 0.6585388049998073
obtaine movies
time: 2.3242113930000414
calculate info
time: 1.4747410599998148
get columns
['avg_profit', 'avg_revenue']
META__collection_name
['collection_avg_profit', 'collection_avg_revenue']
time: 0.6902894919999198
overall time: 573.0981938359998
------------------------------------------------------
budget_start_5percent
-------------------------------------------------------
obtaine movies
time: 31.13680007999983
calculate info
time: 46.64315411999996
get columns
['avg_profit', 'avg_revenue']
META__production_company_1
['production_company_1_avg_profit', 'production_company_1_avg_revenue']
META__production_compan

time: 0.6624273900001754
obtaine movies
time: 6.013400124999862
calculate info
time: 4.818830615000024
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__art__art_direction
['crew__art__art_direction_avg_profit', 'crew__art__art_direction_avg_revenue', 'crew__art__art_direction_movies_before']
time: 0.6349431759999788
obtaine movies
time: 9.32554276000019
calculate info
time: 6.4843958379997275
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__art__production_design
['crew__art__production_design_avg_profit', 'crew__art__production_design_avg_revenue', 'crew__art__production_design_movies_before']
time: 0.6614857019999363
obtaine movies
time: 3.1961927489996924
calculate info
time: 2.102121684999929
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__art__property_master
['crew__art__property_master_avg_profit', 'crew__art__property_master_avg_revenue', 'crew__art__property_master_movies_before']
time: 0.6314625099998921
obtai

time: 0.664535192999665
obtaine movies
time: 3.0056579590000183
calculate info
time: 3.278568102999998
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__production__producer__2
['crew__production__producer__2_avg_profit', 'crew__production__producer__2_avg_revenue', 'crew__production__producer__2_movies_before']
time: 0.6900222130002476
obtaine movies
time: 12.661148052000044
calculate info
time: 8.82373522600028
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__editing__editor__1
['crew__editing__editor__1_avg_profit', 'crew__editing__editor__1_avg_revenue', 'crew__editing__editor__1_movies_before']
time: 0.6901884369999607
obtaine movies
time: 9.232870159000413
calculate info
time: 6.396325024999896
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__costume_designer
['crew__costume__costume_designer_avg_profit', 'crew__costume__costume_designer_avg_revenue', 'crew__costume__costume_designer_movies_before']
time: 0

time: 0.622806679000405
obtaine movies
time: 4.278501604999747
calculate info
time: 2.8669498560002467
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__sound_re_recording_mixer
['crew__sound__sound_re_recording_mixer_avg_profit', 'crew__sound__sound_re_recording_mixer_avg_revenue', 'crew__sound__sound_re_recording_mixer_movies_before']
time: 0.6307267170000159
obtaine movies
time: 3.994202224000219
calculate info
time: 2.6001973990000806
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__supervising_sound_editor
['crew__sound__supervising_sound_editor_avg_profit', 'crew__sound__supervising_sound_editor_avg_revenue', 'crew__sound__supervising_sound_editor_movies_before']
time: 0.6325731660003839
obtaine movies
time: 10.792465759000152
calculate info
time: 8.394941320999806
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__directing__director__1
['crew__directing__director__1_avg_profit', 'crew__directing__direc

META__production_company_3
['production_company_3_avg_profit', 'production_company_3_avg_revenue']
time: 1.6320920529997238
obtaine movies
time: 85.04618550099985
calculate info
time: 64.49483164200001
get columns
['avg_profit', 'experience', 'avg_revenue', 'movies_before']
META__cast_1_name
['cast_1_avg_profit', 'cast_1_experience', 'cast_1_avg_revenue', 'cast_1_movies_before']
META__cast_2_name
['cast_2_avg_profit', 'cast_2_experience', 'cast_2_avg_revenue', 'cast_2_movies_before']
META__cast_3_name
['cast_3_avg_profit', 'cast_3_experience', 'cast_3_avg_revenue', 'cast_3_movies_before']
META__cast_4_name
['cast_4_avg_profit', 'cast_4_experience', 'cast_4_avg_revenue', 'cast_4_movies_before']
META__cast_5_name
['cast_5_avg_profit', 'cast_5_experience', 'cast_5_avg_revenue', 'cast_5_movies_before']
META__cast_6_name
['cast_6_avg_profit', 'cast_6_experience', 'cast_6_avg_revenue', 'cast_6_movies_before']
META__cast_7_name
['cast_7_avg_profit', 'cast_7_experience', 'cast_7_avg_revenue', 

time: 0.5323957649998192
obtaine movies
time: 5.821225697000045
calculate info
time: 4.146995794000759
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__art__set_decoration
['crew__art__set_decoration_avg_profit', 'crew__art__set_decoration_avg_revenue', 'crew__art__set_decoration_movies_before']
time: 0.545710538000094
obtaine movies
time: 2.4571069550001994
calculate info
time: 1.8557259729996076
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__visualeffects__visual_effects_supervisor
['crew__visualeffects__visual_effects_supervisor_avg_profit', 'crew__visualeffects__visual_effects_supervisor_avg_revenue', 'crew__visualeffects__visual_effects_supervisor_movies_before']
time: 0.5477994250004485
obtaine movies
time: 10.179287617999762
calculate info
time: 7.190696012999979
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__director_of_photography
['crew__camera__director_of_photography_avg_profit', 'crew__camera__di

time: 4.026404520999677
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__costume_designer
['crew__costume__costume_designer_avg_profit', 'crew__costume__costume_designer_avg_revenue', 'crew__costume__costume_designer_movies_before']
time: 0.4243458880000617
obtaine movies
time: 1.9075095150001289
calculate info
time: 1.4017679819999103
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__costume_supervisor
['crew__costume__costume_supervisor_avg_profit', 'crew__costume__costume_supervisor_avg_revenue', 'crew__costume__costume_supervisor_movies_before']
time: 0.4322591639993334
obtaine movies
time: 1.7687531749998016
calculate info
time: 1.6558341970003312
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__costume__makeup_artist
['crew__costume__makeup_artist_avg_profit', 'crew__costume__makeup_artist_avg_revenue', 'crew__costume__makeup_artist_movies_before']
time: 0.43201472999953694
obtaine movies
time: 2.4

time: 0.4162553710002612
obtaine movies
time: 6.8194214780005495
calculate info
time: 5.82840566699997
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__directing__director__1
['crew__directing__director__1_avg_profit', 'crew__directing__director__1_avg_revenue', 'crew__directing__director__1_movies_before']
time: 0.4317551979993368
obtaine movies
time: 3.622057286999734
calculate info
time: 2.4531086520000827
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__directing__script_supervisor
['crew__directing__script_supervisor_avg_profit', 'crew__directing__script_supervisor_avg_revenue', 'crew__directing__script_supervisor_movies_before']
time: 0.4304299719997289
obtaine movies
time: 6.81785249099994
calculate info
time: 5.7739191209993805
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__production__casting
['crew__production__casting_avg_profit', 'crew__production__casting_avg_revenue', 'crew__production__casting_movies_bef

META__cast_5_name
['cast_5_avg_profit', 'cast_5_experience', 'cast_5_avg_revenue', 'cast_5_movies_before']
META__cast_6_name
['cast_6_avg_profit', 'cast_6_experience', 'cast_6_avg_revenue', 'cast_6_movies_before']
META__cast_7_name
['cast_7_avg_profit', 'cast_7_experience', 'cast_7_avg_revenue', 'cast_7_movies_before']
META__cast_8_name
['cast_8_avg_profit', 'cast_8_experience', 'cast_8_avg_revenue', 'cast_8_movies_before']
time: 3.27200026599985
obtaine movies
time: 2.7744419059999927
calculate info
time: 1.6890675800004828
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__music_editor
['crew__sound__music_editor_avg_profit', 'crew__sound__music_editor_avg_revenue', 'crew__sound__music_editor_movies_before']
time: 0.3997643180000523
obtaine movies
time: 6.070334316000299
calculate info
time: 4.571140406000268
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__sound__original_music_composer
['crew__sound__original_music_composer_avg_profi

time: 0.4413604689998465
obtaine movies
time: 7.52432445499926
calculate info
time: 5.076369788000193
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__director_of_photography
['crew__camera__director_of_photography_avg_profit', 'crew__camera__director_of_photography_avg_revenue', 'crew__camera__director_of_photography_movies_before']
time: 0.41754018099982204
obtaine movies
time: 1.9386161659995196
calculate info
time: 1.2309827640001458
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__steadicam_operator
['crew__camera__steadicam_operator_avg_profit', 'crew__camera__steadicam_operator_avg_revenue', 'crew__camera__steadicam_operator_movies_before']
time: 0.40400366599988047
obtaine movies
time: 3.3190362830000595
calculate info
time: 2.1747218370001065
get columns
['avg_profit', 'avg_revenue', 'movies_before']
META__crew__camera__still_photographer
['crew__camera__still_photographer_avg_profit', 'crew__camera__still_photographe