In [1]:
# The line below sets the environment
# variable CUDA_VISIBLE_DEVICES
get_ipython().magic('env CUDA_VISIBLE_DEVICES = 1')
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import multiprocessing as mp      # will come in handy due to the size of the data
import os.path
import random
import time
import io
from datetime import datetime
import gc # garbage collector
import sklearn
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder
import math
from sklearn.datasets import dump_svmlight_file
import logging

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
get_ipython().magic('matplotlib inline')
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
get_ipython().magic('load_ext autoreload')
get_ipython().magic('autoreload 2')

env: CUDA_VISIBLE_DEVICES=1




## Write a pandas dataframe to disk as gunzip compressed csv
- df.to_csv('dfsavename.csv.gz', compression='gzip')

## Read from disk
- df = pd.read_csv('dfsavename.csv.gz', compression='gzip')

## Magic useful
- %%timeit for the whole cell
- %timeit for the specific line
- %%latex to render the cell as a block of latex
- %prun and %%prun

In [2]:
DATASET_PATH = '/media/rs/0E06CD1706CD0127/Kapok/WSDM/'
HDF_FILENAME = DATASET_PATH + 'music_info.h5'
HDF_TRAIN_FEATURE_FILENAME = DATASET_PATH + 'music_train_feature_part.h5'
HDF_TEST_FEATURE_FILENAME = DATASET_PATH + 'music_test_feature_part.h5'

In [3]:
def set_logging(logger_name, logger_file_name):
    log = logging.getLogger(logger_name)
    log.setLevel(logging.DEBUG)

    # create formatter and add it to the handlers
    print_formatter = logging.Formatter('%(message)s')
    file_formatter = logging.Formatter('%(asctime)s - %(name)s_%(levelname)s: %(message)s')

    # create file handler which logs even debug messages
    fh = logging.FileHandler(logger_file_name, mode='w')
    fh.setLevel(logging.DEBUG)
    fh.setFormatter(file_formatter)
    log.addHandler(fh)
    # both output to console and file
    consoleHandler = logging.StreamHandler()
    consoleHandler.setFormatter(print_formatter)
    log.addHandler(consoleHandler)
    
    return log

In [4]:
log = set_logging('MUSIC', DATASET_PATH + 'music_test_xgboost.log')
log.info('here is an info message.')

here is an info message.


In [5]:
store_data = pd.HDFStore(HDF_FILENAME)

In [6]:
log.info(store_data['all_train_withextra'].head())

                                           msno  \
0  FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=   
1  Xumu+NIjS6QYVxDS4/t3SawvJ7viT9hPKXmf0RtLNx8=   
2  Xumu+NIjS6QYVxDS4/t3SawvJ7viT9hPKXmf0RtLNx8=   
3  Xumu+NIjS6QYVxDS4/t3SawvJ7viT9hPKXmf0RtLNx8=   
4  FGtllVqz18RPiwJj/edr2gV78zirAiY/9SmYvia+kCg=   

                                        song_id source_system_tab  \
0  BBzumQNXUHKdEBOB7mAJuzok+IJA1c2Ryg/yzTF6tik=           explore   
1  bhp/MpSNoqoxOIB+/l8WPqu6jldth4DIpCm3ayXnJqM=        my library   
2  JNWfrrC7zNN7BdMpsISKa4Mw+xVJYNnxXh3/Epw7QgY=        my library   
3  2A87tzfnJTSWqD7gIZHisolhe4DMdzkbd6LzO1KHjNs=        my library   
4  3qm6XTZ6MOCU11x8FIVbAGH5l5uMkT3/ZalWG1oo2Gc=           explore   

    source_screen_name      source_type target city  bd  gender  \
0              Explore  online-playlist      1    1   0     nan   
1  Local playlist more   local-playlist      1   13  24  female   
2  Local playlist more   local-playlist      1   13  24  female   
3  Local p

In [15]:
h5store = pd.HDFStore(HDF_TRAIN_FEATURE_FILENAME, complib='zlib', complevel=1)

In [16]:
%%timeit
print(h5store.select('all_train_withextra','index>0 & index<10000'))

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

      composer Bill Evan  composer Hank William  composer Max Martin  \
1                    0.0                    0.0                  0.0   
2                    0.0                    0.0                  0.0   
3                    0.0                    0.0                  0.0   
4                    0.0                    0.0                  0.0   
5                    0.0                    0.0                  0.0   
6                    0.0                    0.0                  0.0   
7                    0.0                    0.0                  0.0   
8                    0.0                    0.0                  0.0   
9                    0.0                    0.0                  0.0   
10                   0.0                    0.0                  0.0   
11                   0.0                    0.0                  0.0   
12                   0.0                    0.0                  0.0   
13                   0.0                    0.0                 

In [17]:
h5store.close()