# 1. 모든 시계열

### 게임 활동과 관련된 변수

### cnt_dt, play_time,game_combat_time, get_money, cnt_use_buffitem

### 각 변수별 시계열적인 특성을 보일 수 있는 변수들을 파악

* 기초 통계량 median, mean, var, skew, kurt
* 기준시점 변화량 (1,2,4,8)
* 이동평균(4)
* 주별 변화량
* Irregularity
* autocorrelation

In [1]:
#### import modules
import pandas as pd
import numpy as np
from sklearn import linear_model
import re
import seaborn as sns
from sklearn.decomposition import PCA
sns.set(color_codes=True)
# min max
from sklearn.preprocessing import MinMaxScaler

In [2]:
#### standardized dataset
train_activity = pd.read_csv('../lite_data/train_activity_lite.csv')
test_activity = pd.read_csv('../lite_data/test_activity_lite.csv')
train_label = pd.read_csv('../lite_data/train_label_lite.csv')

#### restored dataset
raw_train_activity = pd.read_csv('../transformed/real_values_int.csv')
raw_test_activity = pd.read_csv('../transformed/test_real_datas_int.csv')

#### 컬럼 순서 new_id / wk / 나머지....
cols = raw_train_activity.columns
raw_train_activity = raw_train_activity.loc[:,[cols[-1],cols[-2]] + cols[0:-2].tolist()].copy()
raw_test_activity = raw_test_activity.loc[:,[cols[-1],cols[-2]] + cols[0:-2].tolist()].copy()
cols = raw_train_activity.columns

In [3]:
#### 변수 정의 - 처음
## X_train, X_test, y_train, label_map
X_train = train_activity.groupby(by='new_id',sort=False,as_index=False).sum().iloc[:,:1]
X_test = test_activity.groupby(by='new_id',sort=False,as_index=False).sum().iloc[:,:1]

In [4]:
## label dictionary
label_map = {'retained':0,'2month':1,'month':2,'week':3}
y_train = pd.Series([label_map[l] for l in train_label.label])

## 시간 관련 기본 변수 (퀘스트, 채팅, 제작 관련 변수 이외의 변수들...)
train_time_var = raw_train_activity.loc[:,['new_id','wk','cnt_dt', 'play_time','game_combat_time', 'get_money', 'cnt_use_buffitem']]
test_time_var = raw_test_activity.loc[:,['new_id','wk','cnt_dt', 'play_time','game_combat_time', 'get_money', 'cnt_use_buffitem']]

In [5]:
#### 핵심 변수만!!!
time_cols = train_time_var.columns.tolist()
num_var = len(time_cols)

In [6]:
time_cols

['new_id',
 'wk',
 'cnt_dt',
 'play_time',
 'game_combat_time',
 'get_money',
 'cnt_use_buffitem']

In [7]:
# 일반화
for i in range(2,num_var):
    ## pivoting
    train_time = train_time_var.iloc[:,[0,1,i]].pivot(index = 'new_id',columns = 'wk',values=time_cols[i])
    train_time = train_time.reindex(index = train_time.index.to_series().str.replace('tr','').astype(int).sort_values().index)# 인덱스 수정
    test_time = test_time_var.iloc[:,[0,1,i]].pivot(index = 'new_id',columns = 'wk',values=time_cols[i])
    test_time = test_time.reindex(index = test_time.index.to_series().str.replace('te','').astype(int).sort_values().index)# 인덱스 수정
    
    #### Feature Engineering

    ### time - series
    # fe_0 base statistics
    
    # median
    train_temp = pd.DataFrame(train_time.T.median(),columns=[time_cols[i] + '_median'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.median(),columns=[time_cols[i] + '_median'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # mean
    train_temp = pd.DataFrame(train_time.T.mean(),columns=[time_cols[i] + '_mean'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.mean(),columns=[time_cols[i] + '_mean'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # var(irregularity)
    train_temp = pd.DataFrame(train_time.T.var(),columns=[time_cols[i] + '_var'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.var(),columns=[time_cols[i] + '_var'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # skew
    train_temp = pd.DataFrame(train_time.T.skew(),columns=[time_cols[i] + '_skew'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.skew(),columns=[time_cols[i] + '_skew'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # kurt
    train_temp = pd.DataFrame(train_time.T.kurt(),columns=[time_cols[i] + '_kurt'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.kurt(),columns=[time_cols[i] + '_kurt'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # fe_1 : 이동평균 MA(4)
    train_temp = train_time.rolling(window = 4,axis=1).mean().iloc[:,3:].copy()
    train_temp.columns = [time_cols[i] + x for x in ['_MA_1','_MA_2','_MA_3','_MA_4','_MA_5']]
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)

    test_temp = test_time.rolling(window = 4,axis=1).mean().iloc[:,3:].copy()
    test_temp.columns = train_temp.columns
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # fe_2 : trend - 주별 변화량
    for z in range(7):
        train_temp = pd.DataFrame((train_time.iloc[:,z+1] - train_time.iloc[:,z]),columns=[time_cols[i] + '_diff' + str(z+1)])
        X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)

        test_temp = pd.DataFrame((test_time.iloc[:,z+1] - test_time.iloc[:,z]),columns=[time_cols[i] + '_diff' + str(z+1)])
        X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)

    # fe_3 : trend - 최근 2주 변화량(단기)
    train_temp = pd.DataFrame((train_time.iloc[:,7] - train_time.iloc[:,5]),columns=[time_cols[i] + '_diff_w2_by_last'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)

    test_temp = pd.DataFrame((test_time.iloc[:,7] - test_time.iloc[:,5]),columns=[time_cols[i] + '_diff_w2_by_last'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    # fe_4 : trend - 최근 4주 변화량(중기)
    train_temp = pd.DataFrame((train_time.iloc[:,7] - train_time.iloc[:,3]),columns=[time_cols[i] + '_diff_w4_by_last'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)

    test_temp = pd.DataFrame((test_time.iloc[:,7] - test_time.iloc[:,3]),columns=[time_cols[i] + '_diff_w4_by_last'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    
    #### 각 기간 별 선형회귀 fit

    train_fit = dict()
    for acc_id in train_time.index:
        lr = linear_model.LinearRegression(n_jobs = -1)
        lr.fit(train_time.columns.values.reshape(8,1),train_time.loc[train_time.index == acc_id,:].values.reshape(8,1))
        train_fit.update({acc_id:[lr.coef_, lr.intercept_] })

    test_fit = dict()
    for acc_id in test_time.index:
        lr = linear_model.LinearRegression(n_jobs = -1)
        lr.fit(test_time.columns.values.reshape(8,1),test_time.loc[test_time.index == acc_id,:].values.reshape(8,1))
        test_fit.update({acc_id:[lr.coef_, lr.intercept_] })
    
    # fe_5 : trend - 장기 변화율(8주 선형회귀의 계수)
    X_train[time_cols[i] + '_diff_w8_by_last'] = X_train.new_id.apply(lambda x : train_fit[x][0][0][0])
    X_test[time_cols[i] + '_diff_w8_by_last'] = X_test.new_id.apply(lambda x : test_fit[x][0][0][0])
    
    # fe_6 : autocorrelation
    train_temp = train_time.apply(lambda x: x.autocorr(), axis = 1).fillna(0)
    X_train = pd.merge(X_train,pd.DataFrame(train_temp,columns=[time_cols[i] +'_cycle']),how="left",left_on='new_id',right_on = train_temp.index)

    test_temp = test_time.apply(lambda x: x.autocorr(), axis = 1).fillna(0)
    X_test = pd.merge(X_test,pd.DataFrame(test_temp,columns=[time_cols[i] +'_cycle']),how="left",left_on='new_id',right_on = test_temp.index)
    
    print(i)

2
3
4
5
6


In [10]:
X_train.to_csv('X_train_게임활동_time.csv',index = False)
X_test.to_csv('X_test_게임활동_time.csv',index = False)

# 2. 간단한 시계열

### 사용자별 8주간 차이가 드러날만한 변수들

###  payment_amount, party_chat, whisper_chat, normal_chat, guild_chat, quest_hongmum, npc_hongmun, item_hongmun, inzone_normal, inzone_light

### 각 변수별 시계열적인 특성을 보일 수 있는 변수들을 파악

In [1]:
#### import modules
import pandas as pd
import numpy as np
from sklearn import linear_model
import re
import seaborn as sns
from sklearn.decomposition import PCA
sns.set(color_codes=True)
# min max
from sklearn.preprocessing import MinMaxScaler

In [2]:
#### standardized dataset
train_activity = pd.read_csv('../lite_data/train_activity_lite.csv')
test_activity = pd.read_csv('../lite_data/test_activity_lite.csv')
train_label = pd.read_csv('../lite_data/train_label_lite.csv')

#### restored dataset
raw_train_activity = pd.read_csv('../transformed/real_values_int.csv')
raw_test_activity = pd.read_csv('../transformed/test_real_datas_int.csv')

#### 컬럼 순서 new_id / wk / 나머지....
cols = raw_train_activity.columns
raw_train_activity = raw_train_activity.loc[:,[cols[-1],cols[-2]] + cols[0:-2].tolist()].copy()
raw_test_activity = raw_test_activity.loc[:,[cols[-1],cols[-2]] + cols[0:-2].tolist()].copy()
cols = raw_train_activity.columns

In [3]:
#### 변수 정의 - 처음
## X_train, X_test, y_train, label_map
X_train = train_activity.groupby(by='new_id',sort=False,as_index=False).sum().iloc[:,:1]
X_test = test_activity.groupby(by='new_id',sort=False,as_index=False).sum().iloc[:,:1]

In [4]:
## label dictionary
label_map = {'retained':0,'2month':1,'month':2,'week':3}
y_train = pd.Series([label_map[l] for l in train_label.label])

## 시간 관련 기본 변수 (퀘스트, 채팅, 제작 관련 변수 이외의 변수들...)
train_time_var = raw_train_activity.loc[:,['new_id','wk','payment_amount', 'party_chat', 'whisper_chat', 'normal_chat', 'guild_chat', 'quest_hongmun', 'npc_hongmun', 'item_hongmun']]
test_time_var = raw_test_activity.loc[:,['new_id','wk','payment_amount', 'party_chat', 'whisper_chat', 'normal_chat', 'guild_chat', 'quest_hongmun', 'npc_hongmun', 'item_hongmun']]

In [5]:
#### 핵심 변수 추출
## inzone_light
# pca 
pca = PCA(n_components = 1)
pca_train = pd.merge(raw_train_activity.iloc[:,:2],train_activity, on = ['new_id','wk'],how ='left').loc[:,['cnt_enter_inzone_light','cnt_clear_inzone_light']]
pca_train = pca.fit_transform(pca_train.fillna(pca_train.min()))

pca_test = pd.merge(raw_test_activity.iloc[:,:2],test_activity, on = ['new_id','wk'],how ='left').loc[:,['cnt_enter_inzone_light','cnt_clear_inzone_light']]
pca_test = pca.transform(pca_test.fillna(pca_train.min()))

# update
train_time_var = pd.merge(train_time_var,pd.concat((raw_train_activity.iloc[:,:2],pd.Series(pca_train.reshape(-1),name='inzone_light')),axis=1))
test_time_var = pd.merge(test_time_var,pd.concat((raw_test_activity.iloc[:,:2],pd.Series(pca_test.reshape(-1),name='inzone_light')),axis=1))

## inzone_normal
# pca 
pca = PCA(n_components = 1)
pca_train = pd.merge(raw_train_activity.iloc[:,:2],train_activity, on = ['new_id','wk'],how ='left').loc[:,['cnt_enter_inzone_normal','cnt_clear_inzone_normal']]
pca_train = pca.fit_transform(pca_train.fillna(pca_train.min()))

pca_test = pd.merge(raw_test_activity.iloc[:,:2],test_activity, on = ['new_id','wk'],how ='left').loc[:,['cnt_enter_inzone_normal','cnt_clear_inzone_normal']]
pca_test = pca.transform(pca_test.fillna(pca_train.min()))

# update
train_time_var = pd.merge(train_time_var,pd.concat((raw_train_activity.iloc[:,:2],pd.Series(pca_train.reshape(-1),name='inzone_normal')),axis=1))
test_time_var = pd.merge(test_time_var,pd.concat((raw_test_activity.iloc[:,:2],pd.Series(pca_test.reshape(-1),name='inzone_normal')),axis=1))

In [6]:
#### 핵심 변수만!!!
time_cols = train_time_var.columns.tolist()
num_var = len(time_cols)

In [7]:
time_cols

['new_id',
 'wk',
 'payment_amount',
 'party_chat',
 'whisper_chat',
 'normal_chat',
 'guild_chat',
 'quest_hongmun',
 'npc_hongmun',
 'item_hongmun',
 'inzone_light',
 'inzone_normal']

In [8]:
# 일반화
for i in range(2,num_var):
    ## pivoting
    train_time = train_time_var.iloc[:,[0,1,i]].pivot(index = 'new_id',columns = 'wk',values=time_cols[i])
    train_time = train_time.reindex(index = train_time.index.to_series().str.replace('tr','').astype(int).sort_values().index)# 인덱스 수정
    test_time = test_time_var.iloc[:,[0,1,i]].pivot(index = 'new_id',columns = 'wk',values=time_cols[i])
    test_time = test_time.reindex(index = test_time.index.to_series().str.replace('te','').astype(int).sort_values().index)# 인덱스 수정
    
    #### Feature Engineering

    ### time - series
    # fe_0 base statistics
    
    # median
    train_temp = pd.DataFrame(train_time.T.median(),columns=[time_cols[i] + '_median'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.median(),columns=[time_cols[i] + '_median'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # mean
    train_temp = pd.DataFrame(train_time.T.mean(),columns=[time_cols[i] + '_mean'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.mean(),columns=[time_cols[i] + '_mean'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # var(irregularity)
    train_temp = pd.DataFrame(train_time.T.var(),columns=[time_cols[i] + '_var'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.var(),columns=[time_cols[i] + '_var'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # skew
    train_temp = pd.DataFrame(train_time.T.skew(),columns=[time_cols[i] + '_skew'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.skew(),columns=[time_cols[i] + '_skew'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # kurt
    train_temp = pd.DataFrame(train_time.T.kurt(),columns=[time_cols[i] + '_kurt'])
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)
    test_temp = pd.DataFrame(test_time.T.kurt(),columns=[time_cols[i] + '_kurt'])
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # fe_1 : 이동평균 MA(4)
    train_temp = train_time.rolling(window = 4,axis=1).mean().iloc[:,3:].copy()
    train_temp.columns = [time_cols[i] + x for x in ['_MA_1','_MA_2','_MA_3','_MA_4','_MA_5']]
    X_train = pd.merge(X_train,train_temp,how="left",left_on='new_id',right_on=train_temp.index)

    test_temp = test_time.rolling(window = 4,axis=1).mean().iloc[:,3:].copy()
    test_temp.columns = train_temp.columns
    X_test = pd.merge(X_test,test_temp,how="left",left_on='new_id',right_on=test_temp.index)
    
    # fe_6 : autocorrelation
    train_temp = train_time.apply(lambda x: x.autocorr(), axis = 1).fillna(0)
    X_train = pd.merge(X_train,pd.DataFrame(train_temp,columns=[time_cols[i] +'_cycle']),how="left",left_on='new_id',right_on = train_temp.index)

    test_temp = test_time.apply(lambda x: x.autocorr(), axis = 1).fillna(0)
    X_test = pd.merge(X_test,pd.DataFrame(test_temp,columns=[time_cols[i] +'_cycle']),how="left",left_on='new_id',right_on = test_temp.index)
    
    print(i)

2
3
4
5
6
7
8
9
10
11


In [12]:
X_train.to_csv('X_train_easy_time.csv',index = False)

In [13]:
X_test.to_csv('X_test_easy_time.csv',index = False)

---

In [4]:
## 상관계수가 높은 변수 분화
# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.duel_cnt,train_activity.duel_win),axis=1))
pca_test = pca.transform(pd.concat((test_activity.duel_cnt,test_activity.duel_win),axis=1))

# update
train_time_var['duel'] = pca_train
test_time_var['duel'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.partybattle_cnt,train_activity.partybattle_win),axis=1))
pca_test = pca.transform(pd.concat((test_activity.partybattle_cnt,test_activity.partybattle_win),axis=1))

# update
train_time_var['partybattle'] = pca_train
test_time_var['partybattle'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_inzone_solo,train_activity.cnt_clear_inzone_solo),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_inzone_solo,test_activity.cnt_clear_inzone_solo),axis=1))

# update
train_time_var['inzone_solo'] = pca_train
test_time_var['inzone_solo'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_inzone_light,train_activity.cnt_clear_inzone_light),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_inzone_light,test_activity.cnt_clear_inzone_light),axis=1))

# update
train_time_var['inzone_light'] = pca_train
test_time_var['inzone_light'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_inzone_skilled,train_activity.cnt_clear_inzone_skilled),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_inzone_skilled,test_activity.cnt_clear_inzone_skilled),axis=1))

# update
train_time_var['inzone_skilled'] = pca_train
test_time_var['inzone_skilled'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_inzone_normal,train_activity.cnt_clear_inzone_normal),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_inzone_normal,test_activity.cnt_clear_inzone_normal),axis=1))

# update
train_time_var['inzone_normal'] = pca_train
test_time_var['inzone_normal'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_raid,train_activity.cnt_clear_raid),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_raid,test_activity.cnt_clear_raid),axis=1))

# update
train_time_var['raid'] = pca_train
test_time_var['raid'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_raid_light,train_activity.cnt_clear_raid_light),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_raid_light,test_activity.cnt_clear_raid_light),axis=1))

# update
train_time_var['raid_light'] = pca_train
test_time_var['raid_light'] = pca_test

# pca 
pca = PCA(n_components = 1)
pca_train = pca.fit_transform(pd.concat((train_activity.cnt_enter_bam,train_activity.cnt_clear_bam),axis=1))
pca_test = pca.transform(pd.concat((test_activity.cnt_enter_bam,test_activity.cnt_clear_bam),axis=1))

# update
train_time_var['bam'] = pca_train
test_time_var['bam'] = pca_test

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-v

In [5]:
#### diff변수...
train_time_var['duel_diff'] = raw_train_activity.duel_cnt - raw_train_activity.duel_win
test_time_var['duel_diff'] = raw_test_activity.duel_cnt - raw_test_activity.duel_win

train_time_var['partybattle_diff'] = raw_train_activity.partybattle_cnt - raw_train_activity.partybattle_win
test_time_var['partybattle_diff'] = raw_test_activity.partybattle_cnt - raw_test_activity.partybattle_win

train_time_var['inzone_solo_diff'] = raw_train_activity.cnt_enter_inzone_solo - raw_train_activity.cnt_clear_inzone_solo
test_time_var['inzone_solo_diff'] = raw_test_activity.cnt_enter_inzone_solo - raw_test_activity.cnt_clear_inzone_solo

train_time_var['inzone_light_diff'] = raw_train_activity.cnt_enter_inzone_light - raw_train_activity.cnt_clear_inzone_light
test_time_var['inzone_light_diff'] = raw_test_activity.cnt_enter_inzone_light - raw_test_activity.cnt_clear_inzone_light

train_time_var['inzone_skilled_diff'] = raw_train_activity.cnt_enter_inzone_skilled - raw_train_activity.cnt_clear_inzone_skilled
test_time_var['inzone_skilled_diff'] = raw_test_activity.cnt_enter_inzone_skilled - raw_test_activity.cnt_clear_inzone_skilled

train_time_var['inzone_normal_diff'] = raw_train_activity.cnt_enter_inzone_normal - raw_train_activity.cnt_clear_inzone_normal
test_time_var['inzone_normal_diff'] = raw_test_activity.cnt_enter_inzone_normal - raw_test_activity.cnt_clear_inzone_normal

train_time_var['raid_diff'] = raw_train_activity.cnt_enter_raid - raw_train_activity.cnt_clear_raid
test_time_var['raid_diff'] = raw_test_activity.cnt_enter_raid - raw_test_activity.cnt_clear_raid

train_time_var['raid_light_diff'] = raw_train_activity.cnt_enter_raid_light - raw_train_activity.cnt_clear_raid_light
test_time_var['raid_light_diff'] = raw_test_activity.cnt_enter_raid_light - raw_test_activity.cnt_clear_raid_light

train_time_var['bam_diff'] = raw_train_activity.cnt_enter_bam - raw_train_activity.cnt_clear_bam
test_time_var['bam_diff'] = raw_test_activity.cnt_enter_bam - raw_test_activity.cnt_clear_bam

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pa