# 3. Preprocess the data ready for a machine learning algorithm

In [1]:
# for the sake of development, use this magic command to solve slow suggestion
%config Completer.use_jedi = False

In [2]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
import matplotlib.pyplot as plt
from datetime import datetime
%matplotlib inline
pd.set_option('max_columns', 500)

from preprocess_data import *

from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder

### Definitions of helper functions

In [3]:
def to_date(x):
    return datetime.strptime(x[:-2]+'01', '%Y-%m-%d')

def load_data():
    df = pd.read_csv('prod-movies/movies.csv'
                    , converters = {'release_date': to_date})

    genres = pd.read_csv('data/ml-latest/movies.csv')
    links = pd.read_csv('data/ml-latest/links.csv')
    genres = genres.merge(links, on='movieId')[['tmdbId','genres']]

    df = df.merge(genres, left_on='id', right_on='tmdbId')
    df['profit'] = df['revenue'] - df['budget']
    df['profit_class'] = df['profit'].apply(lambda x: 1 if x > 0 else 0)
    
    df['year'] = df['release_date'].dt.year
    df['month'] = df['release_date'].dt.strftime('%b')
    
    return df

def expand_categories(row, cat_set):
    """
    Function to expand the categories data into a ;-separated list to include
    a value with each of the categories
    """
    my_str = ""
    
    # iterate through all possible categories
    for category in cat_set:
        # check for matches in this row
        my_str+= category
        if category in row:
            my_str += '-1;'
        else:
            my_str += '-0;'
            
    return my_str[:-1]

In [4]:
df = load_data()

Specify the categorical columns that we will be dealing with

In [5]:
CATEGORICAL_COLUMNS = ['original_language', 'prod_comp_names', 'prod_comp_cntry', 'language'
                       , 'director', 'writers', 'actors', 'genres']

In [6]:
df[CATEGORICAL_COLUMNS].head(3)

Unnamed: 0,original_language,prod_comp_names,prod_comp_cntry,language,director,writers,actors,genres
0,en,Pixar,US,English,John Lasseter,Andrew Stanton;Bud Luckey;Joss Whedon;Joel Coh...,Tom Hanks;Tim Allen;Don Rickles,Adventure|Animation|Children|Comedy|Fantasy
1,en,TriStar Pictures;Interscope Communications;Tei...,US,English;French,Joe Johnston,Jonathan Hensleigh;Greg Taylor;Jim Strain,Robin Williams;Jonathan Hyde;Kirsten Dunst,Adventure|Children|Fantasy
2,en,20th Century Fox,US,English,Forest Whitaker,Ronald Bass;Terry McMillan,Whitney Houston;Angela Bassett;Loretta Devine,Comedy|Drama|Romance


### Handle where we can use the `OneHotEncoder` tool from SciKit-Learn

In [7]:
# create dataframes with these categorical features encoded as binary values
ONE_HOT_COLS = ['original_language', 'director', 'year', 'month']

cat_dfs = []
for col in ONE_HOT_COLS:
    onehot_encoder = OneHotEncoder()
    col_encoded = onehot_encoder.fit_transform(df[[col]])
    col_encoded_df = pd.DataFrame(col_encoded.toarray()
                                 , columns=[col+"_"+str(x) for x in
                                            onehot_encoder.categories_[0]])
    
    if col=='director':
        # find the most proflific directors, take the top 100
        directors = col_encoded_df.transpose().sum(axis=1)
        directors = directors.sort_values(ascending=False)[:100].index
        col_encoded_df = col_encoded_df[directors]
    
    cat_dfs.append(col_encoded_df)

In [8]:
# append the columns onto the main dataframe
for i, col in enumerate(ONE_HOT_COLS):
    df = df.merge(cat_dfs[i], left_on=df.index, right_on=cat_dfs[i].index)
    df.drop([col, 'key_0'], axis=1, inplace=True)

### Handle where we have multiple categories, e.g., genres

This first example looks at the specialised case of the genres.

In [9]:
OTHER_CATEGORICAL_COLS = ['genres', 'actors', 'writers', 'prod_comp_names', 'prod_comp_cntry', 'language']

df_cat = df[OTHER_CATEGORICAL_COLS].copy()
# clean the genres data so it is in the same format as all the other columns
df_cat['genres'] = df_cat['genres'].apply(lambda x: x.replace('|', ';'))

In [10]:
category_counts = {}

# loop through each columns
for col in df_cat.columns:
    # generate the set of unique values for the column
    counts_dict = {}
    for i, row in df_cat[[col]].iterrows():
        for key in row[0].split(';'):
            try:
                count = counts_dict[key]
                counts_dict[key] += 1
            except:
                counts_dict[key] = 1
                
    category_counts[col] = counts_dict

In [11]:
# We have the counts for each of the category values in a dictionary
# Order the dictionary to take those only in the top 100, say
category_dict = {}
for key in category_counts:
    if len(category_counts[key]) > 100:
        df_tmp = pd.DataFrame.from_dict(category_counts[key]
                                       , orient='index'
                                       , columns=['count'])
        df_tmp = df_tmp.sort_values('count', ascending=False).iloc[:100]
        category_dict[key] = list(df_tmp.index)
    else:
        category_dict[key] = [k for k in category_counts[key]]

In [12]:
cat_dfs = []

for col in df_cat.columns:
    i=0
    print(f"Expanding {col}")
    
    df_cat[col] = df_cat[col].apply(lambda x: expand_categories(x, category_dict[col]))
    categories = df_cat[col].str.split(';', expand=True)
    categories.columns = [col+"_"+str(value) for value in categories.columns]
    
    for cat_col in categories:
        categories[cat_col] = pd.to_numeric(categories[cat_col].apply(lambda x: x[-1]))
        i += 1
    
    categories.columns = [(col+"_"+item).replace(' ', '_') for item in category_dict[col]]
    
    cat_dfs.append(categories)

Expanding genres
Expanding actors
Expanding writers
Expanding prod_comp_names
Expanding prod_comp_cntry
Expanding language


In [18]:
for i, col in enumerate(OTHER_CATEGORICAL_COLS):
    df = df.merge(cat_dfs[i], left_on=df.index, right_on=cat_dfs[i].index)
    df.drop([col, 'key_0'], axis=1, inplace=True)

In [19]:
df.head()

Unnamed: 0,budget,id,original_title,popularity,release_date,revenue,runtime,num_prods,num_languages,director_pop,num_writers,avg_writer_pop,max_writer_pop,min_writer_pop,sum_actor_pop,avg_actor_pop,max_actor_pop,min_actor_pop,UNRATE,PCE,CPIAUCSL,tmdbId,profit,profit_class,original_language_ar,original_language_bm,original_language_ca,original_language_cn,original_language_cs,original_language_da,original_language_de,original_language_el,original_language_en,original_language_es,original_language_et,original_language_fi,original_language_fr,original_language_he,original_language_hi,original_language_hu,original_language_id,original_language_it,original_language_ja,original_language_ko,original_language_ku,original_language_ml,original_language_nb,original_language_nl,original_language_no,original_language_pl,original_language_pt,original_language_ro,original_language_ru,original_language_sr,original_language_sv,original_language_ta,original_language_te,original_language_th,original_language_tl,original_language_tr,original_language_uk,original_language_ur,original_language_zh,director_Steven Spielberg,director_Clint Eastwood,director_Ridley Scott,director_Woody Allen,director_Martin Scorsese,director_Steven Soderbergh,director_Ron Howard,director_Spike Lee,director_Tim Burton,director_Robert Zemeckis,director_Brian De Palma,director_Oliver Stone,director_Walter Hill,director_Richard Donner,director_Joel Schumacher,director_Francis Ford Coppola,director_Richard Linklater,director_Joel Coen,director_Renny Harlin,director_Robert Rodriguez,director_Wes Craven,director_Tony Scott,director_George Clooney,director_Michael Bay,director_Sam Raimi,director_John Carpenter,director_Barry Levinson,director_Robert Altman,director_Jean-Jacques Annaud,director_John Landis,director_Ivan Reitman,director_Luc Besson,director_Ashutosh Gowariker,director_David Cronenberg,director_Barry Sonnenfeld,director_Shawn Levy,director_Stephen Herek,director_Kevin Smith,director_Jonathan Frakes,director_Rob Cohen,director_Garry Marshall,director_Peter Jackson,director_Paul W. S. Anderson,director_Antoine Fuqua,director_Ang Lee,director_Tyler Perry,director_Jon Turteltaub,director_Phillip Noyce,director_John McTiernan,director_Bobby Farrelly,director_Rob Reiner,director_Terry Gilliam,director_M. Night Shyamalan,director_Chris Columbus,director_Roland Emmerich,director_Christopher Nolan,director_Edward Zwick,director_David Fincher,director_Lasse Hallström,director_Gore Verbinski,director_Tim Story,director_Wolfgang Petersen,director_Hayao Miyazaki,director_Bruno Dumont,director_Stephen Frears,director_Michael Mann,director_Martin Campbell,director_Wes Anderson,director_Dennis Dugan,director_F. Gary Gray,director_Raja Gosnell,director_Roger Donaldson,director_Kunihiko Yuyama,director_Peter Hyams,director_Mike Nichols,director_James Mangold,director_Peter Berg,director_Guy Ritchie,director_Harold Ramis,director_Todd Phillips,director_Guillermo del Toro,director_Gus Van Sant,director_Jon Favreau,director_Doug Liman,director_Jay Roach,director_Joe Dante,director_Quentin Tarantino,director_Marc Forster,director_David Lynch,director_Jaume Collet-Serra,director_Bob Clark,director_Adam Shankman,director_Malcolm D. Lee,director_Roman Polanski,director_Pedro Almodóvar,director_Alan Parker,director_Sydney Pollack,director_James Wan,director_Stephen Hopkins,director_Brett Ratner,year_1969,year_1970,year_1971,year_1972,year_1973,year_1974,year_1975,year_1976,year_1977,year_1978,year_1979,year_1980,year_1981,year_1982,year_1983,year_1984,year_1985,year_1986,year_1987,year_1988,year_1989,year_1990,year_1991,year_1992,year_1993,year_1994,year_1995,year_1996,year_1997,year_1998,year_1999,year_2000,year_2001,year_2002,year_2003,year_2004,year_2005,year_2006,year_2007,year_2008,year_2009,year_2010,year_2011,year_2012,year_2013,year_2014,year_2015,year_2016,year_2017,year_2018,year_2019,year_2020,month_Apr,month_Aug,month_Dec,month_Feb,month_Jan,month_Jul,month_Jun,month_Mar,month_May,month_Nov,month_Oct,month_Sep,genres_Adventure_x,genres_Animation_x,genres_Children_x,genres_Comedy_x,genres_Fantasy_x,genres_Drama_x,genres_Romance_x,genres_Action_x,genres_Crime_x,genres_Thriller_x,genres_Mystery_x,genres_Sci-Fi_x,genres_Musical_x,genres_Horror_x,genres_War_x,genres_IMAX_x,genres_Western_x,genres_Film-Noir_x,genres_Documentary_x,genres_(no_genres_listed)_x,actors_Robert_De_Niro_x,actors_Bruce_Willis_x,actors_Nicolas_Cage_x,...,prod_comp_names_DreamWorks_Pictures_y,prod_comp_names_Lionsgate_y,prod_comp_names_StudioCanal_y,prod_comp_names_Village_Roadshow_Pictures_y,prod_comp_names_TriStar_Pictures_y,prod_comp_names_United_Artists_y,prod_comp_names_Regency_Enterprises_y,prod_comp_names_Dimension_Films_y,prod_comp_names_Dune_Entertainment_y,prod_comp_names_Fox_Searchlight_Pictures_y,prod_comp_names_Summit_Entertainment_y,prod_comp_names_Amblin_Entertainment_y,prod_comp_names_Screen_Gems_y,prod_comp_names_Working_Title_Films_y,prod_comp_names_Imagine_Entertainment_y,prod_comp_names_Focus_Features_y,prod_comp_names_Fox_2000_Pictures_y,prod_comp_names_New_Regency_Pictures_y,prod_comp_names_The_Weinstein_Company_y,prod_comp_names_TF1_Films_Production_y,prod_comp_names_Silver_Pictures_y,prod_comp_names_Orion_Pictures_y,prod_comp_names_Scott_Rudin_Productions_y,prod_comp_names_France_2_Cinéma_y,prod_comp_names_Film4_Productions_y,prod_comp_names_BBC_Films_y,prod_comp_names_Legendary_Pictures_y,prod_comp_names_Millennium_Films_y,prod_comp_names_Castle_Rock_Entertainment_y,prod_comp_names_TSG_Entertainment_y,prod_comp_names_Ingenious_Media_y,prod_comp_names_Hollywood_Pictures_y,prod_comp_names_Blumhouse_Productions_y,prod_comp_names_Davis_Entertainment_y,prod_comp_names_Spyglass_Entertainment_y,prod_comp_names_Epsilon_Motion_Pictures_y,prod_comp_names_Scott_Free_Productions_y,prod_comp_names_PolyGram_Filmed_Entertainment_y,prod_comp_names_Original_Film_y,prod_comp_names_EuropaCorp_y,prod_comp_names_Revolution_Studios_y,prod_comp_names_Morgan_Creek_Productions_y,prod_comp_names_Lakeshore_Entertainment_y,prod_comp_names_Malpaso_Productions_y,prod_comp_names_DreamWorks_Animation_y,prod_comp_names_CNC_y,prod_comp_names_Participant_Media_y,prod_comp_names_Ciné+_y,prod_comp_names_ARTE_France_Cinéma_y,prod_comp_names_Studio_Babelsberg_y,prod_comp_names_Lions_Gate_Films_y,prod_comp_names_France_3_Cinéma_y,prod_comp_names_UK_Film_Council_y,prod_comp_names_DC_Comics_y,prod_comp_names_RatPac-Dune_Entertainment_y,prod_comp_names_UTV_Motion_Pictures_y,prod_comp_names_Alcon_Entertainment_y,prod_comp_names_Gaumont_y,prod_comp_names_American_Zoetrope_y,prod_comp_names_Film_i_Väst_y,prod_comp_names_DC_Entertainment_y,prod_comp_names_Cannon_Group_y,prod_comp_names_Happy_Madison_Productions_y,prod_comp_names_DENTSU_y,prod_comp_names_Jerry_Bruckheimer_Films_y,prod_comp_names_FilmNation_Entertainment_y,prod_comp_names_Constantin_Film_y,prod_comp_names_The_Kennedy/Marshall_Company_y,prod_comp_names_Walt_Disney_Animation_Studios_y,prod_comp_names_Wild_Bunch_y,prod_comp_names_Nu_Image_y,prod_comp_names_Di_Bonaventura_Pictures_y,prod_comp_names_Reliance_Entertainment_y,prod_comp_names_Nickelodeon_Movies_y,prod_comp_names_NPV_Entertainment_y,prod_comp_names_Vertigo_Entertainment_y,prod_comp_names_Lucasfilm_Ltd._y,prod_comp_names_CJ_Entertainment_y,prod_comp_names_Intermedia_Films_y,prod_comp_names_Marvel_Studios_y,prod_comp_names_IM_Global_y,prod_comp_names_Canal+_España_y,prod_comp_names_Phoenix_Pictures_y,prod_comp_names_EFO_Films_y,prod_comp_names_Dune_Entertainment_III_y,prod_comp_names_Golan-Globus_Productions_y,prod_comp_names_Carolco_Pictures_y,prod_comp_cntry_US_y,prod_comp_cntry_GB_y,prod_comp_cntry_FR_y,prod_comp_cntry_DE_y,prod_comp_cntry_ES_y,prod_comp_cntry_AU_y,prod_comp_cntry_CA_y,prod_comp_cntry_HK_y,prod_comp_cntry_JP_y,prod_comp_cntry_RU_y,prod_comp_cntry_AT_y,prod_comp_cntry_NZ_y,prod_comp_cntry_TW_y,prod_comp_cntry_IT_y,prod_comp_cntry_NL_y,prod_comp_cntry_IE_y,prod_comp_cntry_IN_y,prod_comp_cntry_CZ_y,prod_comp_cntry_BR_y,prod_comp_cntry_MA_y,prod_comp_cntry_NO_y,prod_comp_cntry_SE_y,prod_comp_cntry_LB_y,prod_comp_cntry_DK_y,prod_comp_cntry_MT_y,prod_comp_cntry_RS_y,prod_comp_cntry_FI_y,prod_comp_cntry_IS_y,prod_comp_cntry_CH_y,prod_comp_cntry_LU_y,prod_comp_cntry_MX_y,prod_comp_cntry_PH_y,prod_comp_cntry_KR_y,prod_comp_cntry_TR_y,prod_comp_cntry_PL_y,prod_comp_cntry_LV_y,prod_comp_cntry_SG_y,prod_comp_cntry_RO_y,prod_comp_cntry_BG_y,prod_comp_cntry_CN_y,prod_comp_cntry_TH_y,prod_comp_cntry_CL_y,prod_comp_cntry_BE_y,prod_comp_cntry_ZA_y,prod_comp_cntry_UY_y,prod_comp_cntry_PY_y,prod_comp_cntry_QA_y,prod_comp_cntry_HU_y,prod_comp_cntry_EE_y,prod_comp_cntry_SI_y,prod_comp_cntry_LT_y,prod_comp_cntry_SK_y,prod_comp_cntry_IL_y,prod_comp_cntry_AR_y,prod_comp_cntry_AE_y,prod_comp_cntry_GR_y,prod_comp_cntry_MM_y,prod_comp_cntry_BY_y,prod_comp_cntry_GE_y,prod_comp_cntry_PT_y,prod_comp_cntry_PS_y,prod_comp_cntry_UG_y,prod_comp_cntry_ID_y,prod_comp_cntry_CO_y,prod_comp_cntry_PK_y,prod_comp_cntry_UA_y,prod_comp_cntry_CY_y,language_English_y,language_French_y,language_Spanish_y,language_Russian_y,language_Dutch_y,language_Vietnamese_y,language_Hungarian_y,language_Korean_y,language_Latin_y,language_Gaelic_y,language_Cantonese_y,language_German_y,language_Romanian_y,language_Italian_y,language_Japanese_y,language_Portuguese_y,language_Cornish_y,language_Navajo_y,language_Mandarin_y,language_Norwegian_y,language_Swedish_y,language_Arabic_y,language_Turkish_y,language_Maori_y,language_Polish_y,language_Hebrew_y,language_Czech_y,language_Persian_y,language_Malay_y,language_Thai_y,language_Yiddish_y,language_Nepali_y,language_Khmer_y,language_Irish_y,language_Finnish_y,language_Greek_y,language_Bulgarian_y,language_Slovak_y,language_Serbian_y,language_Bosnian_y,language_Croatian_y,language_Esperanto_y,language_Danish_y,language_Tibetan_y,language_Albanian_y,language_Swahili_y,language_Afrikaans_y,language_Sinhalese_y,language_Guarani_y,language_No_Language_y,language_Hindi_y,language_Maltese_y,language_Urdu_y,language_Lithuanian_y,language_Cree_y,language_Serbo-Croatian_y,language_Somali_y,language_Tagalog_y,language_Ukrainian_y,language_Icelandic_y,language_Punjabi_y,language_Chechen_y,language_Catalan_y,language_Inuktitut_y,language_Moldavian_y,language_Welsh_y,language_Armenian_y,language_Malayalam_y,language_Tamil_y,language_Galician_y,language_Bengali_y,language_Zulu_y,language_Lingala_y,language_Pushto_y,language_Burmese_y,language_Bambara_y,language_Mongolian_y,language_Estonian_y,language_Sanskrit_y,language_Chichewa_y,language__Nyanja_y,language_Sotho_y,language_Xhosa_y,language_Basque_y,language_Corsican_y,language_Belarusian_y,language_Telugu_y,language_Gujarati_y,language_Marathi_y,language_Indonesian_y,language_Georgian_y,language_Breton_y,language_Kurdish_y,language_Ganda_y,language_Kannada_y,language_Tatar_y
0,30000000.0,862.0,Toy Story,112.136,1995-10-01,373554033.0,81.0,1,1,3.352,5,4.489333,6.65,0.932,33.759,11.253,26.885,2.472,5.5,5013.9,153.5,862.0,343554033.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
1,65000000.0,8844.0,Jumanji,22.333,1995-12-01,262797249.0,104.0,4,2,2.963,3,1.821333,4.264,0.6,22.589,7.529667,11.74,2.081,5.6,5097.5,153.9,8844.0,197797249.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2,16000000.0,31357.0,Waiting to Exhale,6.24,1995-12-01,81452156.0,127.0,1,1,5.319,2,0.667,1.401,0.6,10.026,3.342,5.683,1.582,5.6,5097.5,153.9,31357.0,65452156.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,60000000.0,949.0,Heat,28.87,1995-12-01,187436818.0,170.0,3,2,9.183,1,3.061,9.183,9.183,32.729,10.909667,16.267,8.205,5.6,5097.5,153.9,949.0,127436818.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0,0,1,0,0,...,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,58000000.0,11860.0,Sabrina,11.671,1995-12-01,53672080.0,127.0,7,2,1.075,5,2.177,2.243,0.6,21.679,7.226333,11.337,4.979,5.6,5097.5,153.9,11860.0,-4327920.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
