In [1]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
'''
FEATURES:
Overall action count/ratio
Overall day count
Monthly action count/ratio
Product Diversity
Monthly aggregation
Merchant aggregation
Double 11 features
latest one-week
trend
LDA features
'''

'\nFEATURES:\nOverall action count/ratio\nOverall day count\nMonthly action count/ratio\nProduct Diversity\nMonthly aggregation\nMerchant aggregation\nDouble 11 features\nlatest one-week\ntrend\nLDA features\n'

In [2]:
df = pd.read_csv("./use_data/expanded_training.csv")
df

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
3,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
4,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
380,122632,175,1181,4760,247.0,1109,0,3.0,0.0,0
381,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
382,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
383,95362,253,962,3263,626.0,1111,0,0.0,0.0,0


In [3]:
df_user = df[['user_id']]
df_user

Unnamed: 0,user_id
0,379824
1,379824
2,379824
3,379824
4,379824
...,...
380,122632
381,122632
382,122632
383,95362


In [4]:
#FEATURE 1: OVERALL ACTION COUNT/RATIO
actions = {}

# go through user_seller sub-dataframe and split entries by user_id:seller_id
for index, row in df.iterrows():
    if row['user_id'] not in actions:
        # there may be multiple entries, so default with an empty list
        actions[row["user_id"]] = [] 
    actions[row["user_id"]].append(row["action_type"])
    
for k,v in actions.items():
    actions[k] = list(set(v)) # make each value list unique
actions

{379824.0: [0.0, 2.0],
 141307.0: [0.0, 2.0],
 252255.0: [0.0],
 309566.0: [2.0],
 206032.0: [0.0, 2.0, 3.0],
 182119.0: [0.0],
 68225.0: [0.0],
 109919.0: [0.0],
 342023.0: [0.0],
 287829.0: [0.0],
 255960.0: [0.0],
 380420.0: [0.0],
 176150.0: [0.0],
 407671.0: [0.0, 2.0, 3.0],
 388332.0: [0.0],
 244552.0: [0.0, 2.0],
 46507.0: [0.0, 2.0],
 191162.0: [0.0, 2.0],
 174308.0: [0.0, 2.0],
 134532.0: [0.0],
 355159.0: [0.0],
 62942.0: [0.0],
 353962.0: [0.0, 2.0],
 319670.0: [0.0, 2.0],
 210578.0: [0.0],
 101110.0: [0.0, 2.0],
 149798.0: [0.0],
 6018.0: [0.0],
 281262.0: [0.0],
 314519.0: [2.0],
 205257.0: [0.0],
 117839.0: [0.0],
 46699.0: [0.0, 2.0, 3.0],
 238412.0: [0.0],
 156961.0: [0.0],
 45576.0: [0.0],
 214766.0: [0.0],
 5524.0: [0.0],
 218669.0: [0.0],
 272518.0: [0.0],
 277770.0: [0.0],
 386295.0: [0.0],
 402251.0: [0.0],
 254609.0: [0.0],
 84043.0: [0.0],
 120820.0: [0.0],
 291453.0: [0.0],
 280293.0: [0.0],
 291386.0: [0.0],
 313634.0: [0.0, 2.0],
 60260.0: [0.0],
 112997.0: [0

In [5]:
for k,v in actions.items():
    actions[k] = list(set(v)) # make each value list unique

In [6]:
actions

{379824.0: [0.0, 2.0],
 141307.0: [0.0, 2.0],
 252255.0: [0.0],
 309566.0: [2.0],
 206032.0: [0.0, 2.0, 3.0],
 182119.0: [0.0],
 68225.0: [0.0],
 109919.0: [0.0],
 342023.0: [0.0],
 287829.0: [0.0],
 255960.0: [0.0],
 380420.0: [0.0],
 176150.0: [0.0],
 407671.0: [0.0, 2.0, 3.0],
 388332.0: [0.0],
 244552.0: [0.0, 2.0],
 46507.0: [0.0, 2.0],
 191162.0: [0.0, 2.0],
 174308.0: [0.0, 2.0],
 134532.0: [0.0],
 355159.0: [0.0],
 62942.0: [0.0],
 353962.0: [0.0, 2.0],
 319670.0: [0.0, 2.0],
 210578.0: [0.0],
 101110.0: [0.0, 2.0],
 149798.0: [0.0],
 6018.0: [0.0],
 281262.0: [0.0],
 314519.0: [2.0],
 205257.0: [0.0],
 117839.0: [0.0],
 46699.0: [0.0, 2.0, 3.0],
 238412.0: [0.0],
 156961.0: [0.0],
 45576.0: [0.0],
 214766.0: [0.0],
 5524.0: [0.0],
 218669.0: [0.0],
 272518.0: [0.0],
 277770.0: [0.0],
 386295.0: [0.0],
 402251.0: [0.0],
 254609.0: [0.0],
 84043.0: [0.0],
 120820.0: [0.0],
 291453.0: [0.0],
 280293.0: [0.0],
 291386.0: [0.0],
 313634.0: [0.0, 2.0],
 60260.0: [0.0],
 112997.0: [0

In [7]:
action_count = {}
for key in actions.keys():
    action_count[key] = len(actions[key])
action_count

{379824.0: 2,
 141307.0: 2,
 252255.0: 1,
 309566.0: 1,
 206032.0: 3,
 182119.0: 1,
 68225.0: 1,
 109919.0: 1,
 342023.0: 1,
 287829.0: 1,
 255960.0: 1,
 380420.0: 1,
 176150.0: 1,
 407671.0: 3,
 388332.0: 1,
 244552.0: 2,
 46507.0: 2,
 191162.0: 2,
 174308.0: 2,
 134532.0: 1,
 355159.0: 1,
 62942.0: 1,
 353962.0: 2,
 319670.0: 2,
 210578.0: 1,
 101110.0: 2,
 149798.0: 1,
 6018.0: 1,
 281262.0: 1,
 314519.0: 1,
 205257.0: 1,
 117839.0: 1,
 46699.0: 3,
 238412.0: 1,
 156961.0: 1,
 45576.0: 1,
 214766.0: 1,
 5524.0: 1,
 218669.0: 1,
 272518.0: 1,
 277770.0: 1,
 386295.0: 1,
 402251.0: 1,
 254609.0: 1,
 84043.0: 1,
 120820.0: 1,
 291453.0: 1,
 280293.0: 1,
 291386.0: 1,
 313634.0: 2,
 60260.0: 1,
 112997.0: 1,
 305925.0: 2,
 199429.0: 1,
 185381.0: 1,
 144281.0: 1,
 33618.0: 1,
 276857.0: 2,
 130186.0: 2,
 376338.0: 2,
 867.0: 1,
 338395.0: 2,
 335587.0: 2,
 409283.0: 1,
 88275.0: 1,
 165355.0: 1,
 55784.0: 1,
 327782.0: 2,
 264657.0: 2,
 52751.0: 1,
 33745.0: 1,
 103980.0: 1,
 316235.0: 

In [8]:
total_action_count = sum(action_count.values())
total_action_count

216

In [9]:
action_count_ratio = {}
for key in action_count:
    action_count_ratio[key] = action_count[key]/total_action_count
action_count_ratio

{379824.0: 0.009259259259259259,
 141307.0: 0.009259259259259259,
 252255.0: 0.004629629629629629,
 309566.0: 0.004629629629629629,
 206032.0: 0.013888888888888888,
 182119.0: 0.004629629629629629,
 68225.0: 0.004629629629629629,
 109919.0: 0.004629629629629629,
 342023.0: 0.004629629629629629,
 287829.0: 0.004629629629629629,
 255960.0: 0.004629629629629629,
 380420.0: 0.004629629629629629,
 176150.0: 0.004629629629629629,
 407671.0: 0.013888888888888888,
 388332.0: 0.004629629629629629,
 244552.0: 0.009259259259259259,
 46507.0: 0.009259259259259259,
 191162.0: 0.009259259259259259,
 174308.0: 0.009259259259259259,
 134532.0: 0.004629629629629629,
 355159.0: 0.004629629629629629,
 62942.0: 0.004629629629629629,
 353962.0: 0.009259259259259259,
 319670.0: 0.009259259259259259,
 210578.0: 0.004629629629629629,
 101110.0: 0.009259259259259259,
 149798.0: 0.004629629629629629,
 6018.0: 0.004629629629629629,
 281262.0: 0.004629629629629629,
 314519.0: 0.004629629629629629,
 205257.0: 0.00

In [10]:
df_user['action count'] = -1
df_user['action count ratio'] = -1
for index,row in df_user.iterrows():
        df_user.at[index, 'action count'] = action_count[row['user_id']]
        df_user.at[index, 'action count ratio'] = action_count_ratio[row['user_id']]

df_user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['action count'] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['action count ratio'] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user.at[index, 'action count ratio'] = action_count_ratio[row['user_id']]


Unnamed: 0,user_id,action count,action count ratio
0,379824,2,0.009259
1,379824,2,0.009259
2,379824,2,0.009259
3,379824,2,0.009259
4,379824,2,0.009259
...,...,...,...
380,122632,1,0.004630
381,122632,1,0.004630
382,122632,1,0.004630
383,95362,1,0.004630


In [11]:
#FEATURE 2: OVERALL DAY COUNT
days = {}

# go through user_seller sub-dataframe and split entries by user_id:seller_id
for index, row in df.iterrows():
    if row['user_id'] not in days:
        # there may be multiple entries, so default with an empty list
        days[row["user_id"]] = [] 
    days[row["user_id"]].append(row["time_stamp"])

In [12]:
for k,v in days.items():
    days[k] = list(set(v)) # make each value list unique

In [13]:
days

{379824.0: [1110.0, 1111.0],
 141307.0: [1107.0, 1111.0],
 252255.0: [1111.0],
 309566.0: [1111.0],
 206032.0: [1103.0, 1104.0, 1105.0, 1107.0, 1108.0, 1109.0, 1111.0],
 182119.0: [1110.0, 1111.0],
 68225.0: [1108.0],
 109919.0: [1105.0],
 342023.0: [1108.0],
 287829.0: [1111.0],
 255960.0: [1111.0],
 380420.0: [1105.0],
 176150.0: [1111.0],
 407671.0: [1109.0, 1111.0],
 388332.0: [520.0],
 244552.0: [1111.0],
 46507.0: [1111.0],
 191162.0: [1111.0],
 174308.0: [1111.0],
 134532.0: [1111.0],
 355159.0: [1111.0],
 62942.0: [1103.0],
 353962.0: [1111.0],
 319670.0: [1111.0],
 210578.0: [1111.0],
 101110.0: [1111.0],
 149798.0: [1108.0],
 6018.0: [1111.0],
 281262.0: [1109.0],
 314519.0: [1111.0],
 205257.0: [1111.0],
 117839.0: [1111.0],
 46699.0: [1106.0, 1109.0, 1110.0, 1111.0],
 238412.0: [1111.0],
 156961.0: [1111.0],
 45576.0: [1111.0],
 214766.0: [1110.0],
 5524.0: [1111.0],
 218669.0: [1111.0],
 272518.0: [1111.0],
 277770.0: [1111.0],
 386295.0: [1111.0],
 402251.0: [1109.0, 1103

In [14]:
day_count = {}
for key in days.keys():
    day_count[key] = len(days[key])
day_count

{379824.0: 2,
 141307.0: 2,
 252255.0: 1,
 309566.0: 1,
 206032.0: 7,
 182119.0: 2,
 68225.0: 1,
 109919.0: 1,
 342023.0: 1,
 287829.0: 1,
 255960.0: 1,
 380420.0: 1,
 176150.0: 1,
 407671.0: 2,
 388332.0: 1,
 244552.0: 1,
 46507.0: 1,
 191162.0: 1,
 174308.0: 1,
 134532.0: 1,
 355159.0: 1,
 62942.0: 1,
 353962.0: 1,
 319670.0: 1,
 210578.0: 1,
 101110.0: 1,
 149798.0: 1,
 6018.0: 1,
 281262.0: 1,
 314519.0: 1,
 205257.0: 1,
 117839.0: 1,
 46699.0: 4,
 238412.0: 1,
 156961.0: 1,
 45576.0: 1,
 214766.0: 1,
 5524.0: 1,
 218669.0: 1,
 272518.0: 1,
 277770.0: 1,
 386295.0: 1,
 402251.0: 2,
 254609.0: 2,
 84043.0: 1,
 120820.0: 1,
 291453.0: 1,
 280293.0: 1,
 291386.0: 1,
 313634.0: 1,
 60260.0: 1,
 112997.0: 1,
 305925.0: 1,
 199429.0: 1,
 185381.0: 1,
 144281.0: 1,
 33618.0: 1,
 276857.0: 1,
 130186.0: 2,
 376338.0: 1,
 867.0: 1,
 338395.0: 2,
 335587.0: 1,
 409283.0: 1,
 88275.0: 1,
 165355.0: 3,
 55784.0: 1,
 327782.0: 4,
 264657.0: 1,
 52751.0: 1,
 33745.0: 1,
 103980.0: 1,
 316235.0: 

In [15]:
df_user['day count'] = -1
for index,row in df_user.iterrows():
        df_user.at[index, 'day count'] = day_count[row['user_id']]

df_user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['day count'] = -1


Unnamed: 0,user_id,action count,action count ratio,day count
0,379824,2,0.009259,2
1,379824,2,0.009259,2
2,379824,2,0.009259,2
3,379824,2,0.009259,2
4,379824,2,0.009259,2
...,...,...,...,...
380,122632,1,0.004630,2
381,122632,1,0.004630,2
382,122632,1,0.004630,2
383,95362,1,0.004630,1


In [16]:
#FEATURE 4: Product Diversity
items = {}

# go through user_seller sub-dataframe and split entries by user_id:seller_id
for index, row in df.iterrows():
    if row['item_id'] not in items:
        # there may be multiple entries, so default with an empty list
        items[row["user_id"]] = [] 
    items[row["user_id"]].append(row["item_id"])

In [17]:
for k,v in items.items():
    items[k] = list(set(v)) # make each value list unique

In [18]:
items

{379824.0: [198.0],
 141307.0: [175.0],
 252255.0: [175.0],
 309566.0: [175.0],
 206032.0: [281.0],
 182119.0: [279.0],
 68225.0: [279.0],
 109919.0: [198.0],
 342023.0: [190.0],
 287829.0: [175.0],
 255960.0: [279.0],
 380420.0: [190.0],
 176150.0: [175.0],
 407671.0: [219.0],
 388332.0: [285.0],
 244552.0: [278.0],
 46507.0: [224.0],
 191162.0: [184.0],
 174308.0: [175.0],
 134532.0: [279.0],
 355159.0: [279.0],
 62942.0: [312.0],
 353962.0: [184.0],
 319670.0: [175.0],
 210578.0: [279.0],
 101110.0: [281.0],
 149798.0: [224.0],
 6018.0: [184.0],
 281262.0: [279.0],
 314519.0: [219.0],
 205257.0: [279.0],
 117839.0: [279.0],
 46699.0: [175.0],
 238412.0: [253.0],
 156961.0: [279.0],
 45576.0: [279.0],
 214766.0: [253.0],
 5524.0: [224.0],
 218669.0: [279.0],
 272518.0: [224.0],
 277770.0: [279.0],
 386295.0: [279.0],
 402251.0: [175.0],
 254609.0: [279.0],
 84043.0: [175.0],
 120820.0: [175.0],
 291453.0: [274.0],
 280293.0: [184.0],
 291386.0: [279.0],
 313634.0: [226.0],
 60260.0: 

In [19]:
item_count = {}
for key in items.keys():
    item_count[key] = len(items[key])
item_count

{379824.0: 1,
 141307.0: 1,
 252255.0: 1,
 309566.0: 1,
 206032.0: 1,
 182119.0: 1,
 68225.0: 1,
 109919.0: 1,
 342023.0: 1,
 287829.0: 1,
 255960.0: 1,
 380420.0: 1,
 176150.0: 1,
 407671.0: 1,
 388332.0: 1,
 244552.0: 1,
 46507.0: 1,
 191162.0: 1,
 174308.0: 1,
 134532.0: 1,
 355159.0: 1,
 62942.0: 1,
 353962.0: 1,
 319670.0: 1,
 210578.0: 1,
 101110.0: 1,
 149798.0: 1,
 6018.0: 1,
 281262.0: 1,
 314519.0: 1,
 205257.0: 1,
 117839.0: 1,
 46699.0: 1,
 238412.0: 1,
 156961.0: 1,
 45576.0: 1,
 214766.0: 1,
 5524.0: 1,
 218669.0: 1,
 272518.0: 1,
 277770.0: 1,
 386295.0: 1,
 402251.0: 1,
 254609.0: 1,
 84043.0: 1,
 120820.0: 1,
 291453.0: 1,
 280293.0: 1,
 291386.0: 1,
 313634.0: 1,
 60260.0: 1,
 112997.0: 1,
 305925.0: 1,
 199429.0: 1,
 185381.0: 1,
 144281.0: 1,
 33618.0: 1,
 276857.0: 1,
 130186.0: 1,
 376338.0: 1,
 867.0: 1,
 338395.0: 1,
 335587.0: 1,
 409283.0: 1,
 88275.0: 1,
 165355.0: 1,
 55784.0: 1,
 327782.0: 1,
 264657.0: 1,
 52751.0: 1,
 33745.0: 1,
 103980.0: 1,
 316235.0: 

In [20]:
total_item_count = sum(item_count.values())
total_item_count

169

In [21]:
item_count_diversity = {}
for key in item_count:
    item_count_diversity[key]= item_count[key]/total_item_count
item_count_diversity

{379824.0: 0.005917159763313609,
 141307.0: 0.005917159763313609,
 252255.0: 0.005917159763313609,
 309566.0: 0.005917159763313609,
 206032.0: 0.005917159763313609,
 182119.0: 0.005917159763313609,
 68225.0: 0.005917159763313609,
 109919.0: 0.005917159763313609,
 342023.0: 0.005917159763313609,
 287829.0: 0.005917159763313609,
 255960.0: 0.005917159763313609,
 380420.0: 0.005917159763313609,
 176150.0: 0.005917159763313609,
 407671.0: 0.005917159763313609,
 388332.0: 0.005917159763313609,
 244552.0: 0.005917159763313609,
 46507.0: 0.005917159763313609,
 191162.0: 0.005917159763313609,
 174308.0: 0.005917159763313609,
 134532.0: 0.005917159763313609,
 355159.0: 0.005917159763313609,
 62942.0: 0.005917159763313609,
 353962.0: 0.005917159763313609,
 319670.0: 0.005917159763313609,
 210578.0: 0.005917159763313609,
 101110.0: 0.005917159763313609,
 149798.0: 0.005917159763313609,
 6018.0: 0.005917159763313609,
 281262.0: 0.005917159763313609,
 314519.0: 0.005917159763313609,
 205257.0: 0.00

In [22]:
df_user['product diversity'] = -1
for index,row in df_user.iterrows():
        df_user.at[index, 'product diversity'] = item_count_diversity[row['user_id']]

df_user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['product diversity'] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user.at[index, 'product diversity'] = item_count_diversity[row['user_id']]


Unnamed: 0,user_id,action count,action count ratio,day count,product diversity
0,379824,2,0.009259,2,0.005917
1,379824,2,0.009259,2,0.005917
2,379824,2,0.009259,2,0.005917
3,379824,2,0.009259,2,0.005917
4,379824,2,0.009259,2,0.005917
...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917
381,122632,1,0.004630,2,0.005917
382,122632,1,0.004630,2,0.005917
383,95362,1,0.004630,1,0.005917


In [23]:
#FEATURE 9: DOUBLE 11 FEATURES

In [24]:
df_11 = df[df['time_stamp']==1111]
df_11

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
9,141307,175,1181,4760,247.0,1111,0,4.0,1.0,0
10,141307,175,1181,4760,247.0,1111,0,4.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
375,289079,279,898,3323,683.0,1111,0,4.0,1.0,1
377,403117,175,1181,4760,247.0,1111,0,2.0,1.0,0
378,36385,219,349,1943,6208.0,1111,2,0.0,0.0,0
379,36385,219,349,1943,6208.0,1111,0,0.0,0.0,0


In [25]:
actions_11 = {}

# go through user_seller sub-dataframe and split entries by user_id:seller_id
for index, row in df_11.iterrows():
    if row['user_id'] not in actions_11:
        # there may be multiple entries, so default with an empty list
        actions_11[row["user_id"]] = [] 
    actions_11[row["user_id"]].append(row["action_type"])

In [26]:
for k,v in actions_11.items():
    actions_11[k] = list(set(v)) # make each value list unique
actions_11

{379824.0: [0.0, 2.0],
 141307.0: [0.0, 2.0],
 252255.0: [0.0],
 309566.0: [2.0],
 206032.0: [0.0, 2.0],
 182119.0: [0.0],
 287829.0: [0.0],
 255960.0: [0.0],
 176150.0: [0.0],
 407671.0: [0.0, 2.0],
 244552.0: [0.0, 2.0],
 46507.0: [0.0, 2.0],
 191162.0: [0.0, 2.0],
 174308.0: [0.0, 2.0],
 134532.0: [0.0],
 355159.0: [0.0],
 353962.0: [0.0, 2.0],
 319670.0: [0.0, 2.0],
 210578.0: [0.0],
 101110.0: [0.0, 2.0],
 6018.0: [0.0],
 314519.0: [2.0],
 205257.0: [0.0],
 117839.0: [0.0],
 46699.0: [2.0],
 238412.0: [0.0],
 156961.0: [0.0],
 45576.0: [0.0],
 5524.0: [0.0],
 218669.0: [0.0],
 272518.0: [0.0],
 277770.0: [0.0],
 386295.0: [0.0],
 254609.0: [0.0],
 84043.0: [0.0],
 120820.0: [0.0],
 291453.0: [0.0],
 280293.0: [0.0],
 313634.0: [0.0, 2.0],
 60260.0: [0.0],
 305925.0: [0.0, 3.0],
 185381.0: [0.0],
 144281.0: [0.0],
 33618.0: [0.0],
 276857.0: [0.0, 2.0],
 130186.0: [0.0],
 376338.0: [0.0, 2.0],
 867.0: [0.0],
 338395.0: [2.0],
 335587.0: [0.0, 2.0],
 409283.0: [0.0],
 165355.0: [0.0

In [27]:
action_count_11 = {}
for key in actions_11.keys():
    action_count_11[key] = len(actions_11[key])
action_count_11

{379824.0: 2,
 141307.0: 2,
 252255.0: 1,
 309566.0: 1,
 206032.0: 2,
 182119.0: 1,
 287829.0: 1,
 255960.0: 1,
 176150.0: 1,
 407671.0: 2,
 244552.0: 2,
 46507.0: 2,
 191162.0: 2,
 174308.0: 2,
 134532.0: 1,
 355159.0: 1,
 353962.0: 2,
 319670.0: 2,
 210578.0: 1,
 101110.0: 2,
 6018.0: 1,
 314519.0: 1,
 205257.0: 1,
 117839.0: 1,
 46699.0: 1,
 238412.0: 1,
 156961.0: 1,
 45576.0: 1,
 5524.0: 1,
 218669.0: 1,
 272518.0: 1,
 277770.0: 1,
 386295.0: 1,
 254609.0: 1,
 84043.0: 1,
 120820.0: 1,
 291453.0: 1,
 280293.0: 1,
 313634.0: 2,
 60260.0: 1,
 305925.0: 2,
 185381.0: 1,
 144281.0: 1,
 33618.0: 1,
 276857.0: 2,
 130186.0: 1,
 376338.0: 2,
 867.0: 1,
 338395.0: 1,
 335587.0: 2,
 409283.0: 1,
 165355.0: 1,
 55784.0: 1,
 327782.0: 1,
 264657.0: 2,
 52751.0: 1,
 33745.0: 1,
 316235.0: 2,
 380439.0: 1,
 408759.0: 1,
 38606.0: 2,
 108672.0: 1,
 275260.0: 2,
 53531.0: 1,
 51728.0: 2,
 177770.0: 1,
 389062.0: 2,
 383379.0: 1,
 165262.0: 1,
 97492.0: 1,
 335815.0: 1,
 8552.0: 1,
 340708.0: 1,


In [28]:
total_action_count_11 = sum(action_count_11.values())
total_action_count_11

145

In [29]:
action_count_ratio_11 = {}
for key in action_count_11:
    action_count_ratio_11[key] = action_count_11[key]/total_action_count_11
action_count_ratio_11

{379824.0: 0.013793103448275862,
 141307.0: 0.013793103448275862,
 252255.0: 0.006896551724137931,
 309566.0: 0.006896551724137931,
 206032.0: 0.013793103448275862,
 182119.0: 0.006896551724137931,
 287829.0: 0.006896551724137931,
 255960.0: 0.006896551724137931,
 176150.0: 0.006896551724137931,
 407671.0: 0.013793103448275862,
 244552.0: 0.013793103448275862,
 46507.0: 0.013793103448275862,
 191162.0: 0.013793103448275862,
 174308.0: 0.013793103448275862,
 134532.0: 0.006896551724137931,
 355159.0: 0.006896551724137931,
 353962.0: 0.013793103448275862,
 319670.0: 0.013793103448275862,
 210578.0: 0.006896551724137931,
 101110.0: 0.013793103448275862,
 6018.0: 0.006896551724137931,
 314519.0: 0.006896551724137931,
 205257.0: 0.006896551724137931,
 117839.0: 0.006896551724137931,
 46699.0: 0.006896551724137931,
 238412.0: 0.006896551724137931,
 156961.0: 0.006896551724137931,
 45576.0: 0.006896551724137931,
 5524.0: 0.006896551724137931,
 218669.0: 0.006896551724137931,
 272518.0: 0.0068

In [30]:
df_user['1111 action count'] = 0
df_user['1111 action count ratio'] = 0
for index,row in df_user.iterrows():
        if row['user_id'] in action_count_11.keys():
            df_user.at[index, '1111 action count'] = action_count_11[row['user_id']]
        if row['user_id'] in action_count_ratio_11.keys():
            df_user.at[index, '1111 action count ratio'] = action_count_ratio_11[row['user_id']]

df_user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['1111 action count'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['1111 action count ratio'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user.at[index, '1111 action count ratio'] = action_count_ratio_11[row['user_id']]


Unnamed: 0,user_id,action count,action count ratio,day count,product diversity,1111 action count,1111 action count ratio
0,379824,2,0.009259,2,0.005917,2,0.013793
1,379824,2,0.009259,2,0.005917,2,0.013793
2,379824,2,0.009259,2,0.005917,2,0.013793
3,379824,2,0.009259,2,0.005917,2,0.013793
4,379824,2,0.009259,2,0.005917,2,0.013793
...,...,...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917,0,0.000000
381,122632,1,0.004630,2,0.005917,0,0.000000
382,122632,1,0.004630,2,0.005917,0,0.000000
383,95362,1,0.004630,1,0.005917,1,0.006897


In [31]:
activity_ratio_1111 = {}
df_user['1111 activity ratio'] = 0
for index,row in df_user.iterrows():
    if df_user.at[index,'1111 action count'] != 0:
        df_user.at[index, '1111 activity ratio'] = (df_user.at[index,'1111 action count']/df_user.at[index,'action count'])
df_user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['1111 activity ratio'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user.at[index, '1111 activity ratio'] = (df_user.at[index,'1111 action count']/df_user.at[index,'action count'])


Unnamed: 0,user_id,action count,action count ratio,day count,product diversity,1111 action count,1111 action count ratio,1111 activity ratio
0,379824,2,0.009259,2,0.005917,2,0.013793,1.0
1,379824,2,0.009259,2,0.005917,2,0.013793,1.0
2,379824,2,0.009259,2,0.005917,2,0.013793,1.0
3,379824,2,0.009259,2,0.005917,2,0.013793,1.0
4,379824,2,0.009259,2,0.005917,2,0.013793,1.0
...,...,...,...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917,0,0.000000,0.0
381,122632,1,0.004630,2,0.005917,0,0.000000,0.0
382,122632,1,0.004630,2,0.005917,0,0.000000,0.0
383,95362,1,0.004630,1,0.005917,1,0.006897,1.0


In [32]:
#FEATURE 10: LATEST ONE-WEEK
latest_week = 1104
df_latest_week = df[df['time_stamp']>=latest_week]
df_latest_week

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
3,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
4,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
380,122632,175,1181,4760,247.0,1109,0,3.0,0.0,0
381,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
382,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
383,95362,253,962,3263,626.0,1111,0,0.0,0.0,0


In [33]:
actions_last_week = {}

# go through user_seller sub-dataframe and split entries by user_id:seller_id
for index, row in df_latest_week.iterrows():
    if row['user_id'] not in actions_last_week:
        # there may be multiple entries, so default with an empty list
        actions_last_week[row["user_id"]] = [] 
    actions_last_week[row["user_id"]].append(row["action_type"])
    
for k,v in actions_last_week.items():
    actions_last_week[k] = list(set(v)) # make each value list unique
actions_last_week

{379824.0: [0.0, 2.0],
 141307.0: [0.0, 2.0],
 252255.0: [0.0],
 309566.0: [2.0],
 206032.0: [0.0, 2.0],
 182119.0: [0.0],
 68225.0: [0.0],
 109919.0: [0.0],
 342023.0: [0.0],
 287829.0: [0.0],
 255960.0: [0.0],
 380420.0: [0.0],
 176150.0: [0.0],
 407671.0: [0.0, 2.0, 3.0],
 244552.0: [0.0, 2.0],
 46507.0: [0.0, 2.0],
 191162.0: [0.0, 2.0],
 174308.0: [0.0, 2.0],
 134532.0: [0.0],
 355159.0: [0.0],
 353962.0: [0.0, 2.0],
 319670.0: [0.0, 2.0],
 210578.0: [0.0],
 101110.0: [0.0, 2.0],
 149798.0: [0.0],
 6018.0: [0.0],
 281262.0: [0.0],
 314519.0: [2.0],
 205257.0: [0.0],
 117839.0: [0.0],
 46699.0: [0.0, 2.0, 3.0],
 238412.0: [0.0],
 156961.0: [0.0],
 45576.0: [0.0],
 214766.0: [0.0],
 5524.0: [0.0],
 218669.0: [0.0],
 272518.0: [0.0],
 277770.0: [0.0],
 386295.0: [0.0],
 402251.0: [0.0],
 254609.0: [0.0],
 84043.0: [0.0],
 120820.0: [0.0],
 291453.0: [0.0],
 280293.0: [0.0],
 291386.0: [0.0],
 313634.0: [0.0, 2.0],
 60260.0: [0.0],
 112997.0: [0.0],
 305925.0: [0.0, 3.0],
 199429.0: [

In [34]:
action_count_last_week = {}
for key in actions_last_week.keys():
    action_count_last_week[key] = len(actions_last_week[key])
action_count_last_week

{379824.0: 2,
 141307.0: 2,
 252255.0: 1,
 309566.0: 1,
 206032.0: 2,
 182119.0: 1,
 68225.0: 1,
 109919.0: 1,
 342023.0: 1,
 287829.0: 1,
 255960.0: 1,
 380420.0: 1,
 176150.0: 1,
 407671.0: 3,
 244552.0: 2,
 46507.0: 2,
 191162.0: 2,
 174308.0: 2,
 134532.0: 1,
 355159.0: 1,
 353962.0: 2,
 319670.0: 2,
 210578.0: 1,
 101110.0: 2,
 149798.0: 1,
 6018.0: 1,
 281262.0: 1,
 314519.0: 1,
 205257.0: 1,
 117839.0: 1,
 46699.0: 3,
 238412.0: 1,
 156961.0: 1,
 45576.0: 1,
 214766.0: 1,
 5524.0: 1,
 218669.0: 1,
 272518.0: 1,
 277770.0: 1,
 386295.0: 1,
 402251.0: 1,
 254609.0: 1,
 84043.0: 1,
 120820.0: 1,
 291453.0: 1,
 280293.0: 1,
 291386.0: 1,
 313634.0: 2,
 60260.0: 1,
 112997.0: 1,
 305925.0: 2,
 199429.0: 1,
 185381.0: 1,
 144281.0: 1,
 33618.0: 1,
 276857.0: 2,
 130186.0: 2,
 376338.0: 2,
 867.0: 1,
 338395.0: 2,
 335587.0: 2,
 409283.0: 1,
 165355.0: 1,
 55784.0: 1,
 327782.0: 2,
 264657.0: 2,
 52751.0: 1,
 33745.0: 1,
 316235.0: 3,
 380439.0: 1,
 86171.0: 1,
 408759.0: 1,
 38606.0: 

In [35]:
total_action_count_last_week = sum(action_count_last_week.values())
total_action_count_last_week

197

In [36]:
action_count_ratio_last_week = {}
for key in action_count_last_week:
    action_count_ratio_last_week[key] = action_count_last_week[key]/total_action_count_last_week
action_count_ratio_last_week

{379824.0: 0.01015228426395939,
 141307.0: 0.01015228426395939,
 252255.0: 0.005076142131979695,
 309566.0: 0.005076142131979695,
 206032.0: 0.01015228426395939,
 182119.0: 0.005076142131979695,
 68225.0: 0.005076142131979695,
 109919.0: 0.005076142131979695,
 342023.0: 0.005076142131979695,
 287829.0: 0.005076142131979695,
 255960.0: 0.005076142131979695,
 380420.0: 0.005076142131979695,
 176150.0: 0.005076142131979695,
 407671.0: 0.015228426395939087,
 244552.0: 0.01015228426395939,
 46507.0: 0.01015228426395939,
 191162.0: 0.01015228426395939,
 174308.0: 0.01015228426395939,
 134532.0: 0.005076142131979695,
 355159.0: 0.005076142131979695,
 353962.0: 0.01015228426395939,
 319670.0: 0.01015228426395939,
 210578.0: 0.005076142131979695,
 101110.0: 0.01015228426395939,
 149798.0: 0.005076142131979695,
 6018.0: 0.005076142131979695,
 281262.0: 0.005076142131979695,
 314519.0: 0.005076142131979695,
 205257.0: 0.005076142131979695,
 117839.0: 0.005076142131979695,
 46699.0: 0.015228426395

In [37]:
df_user['last week action count'] = 0
df_user['last week action count ratio'] = 0
for index,row in df_user.iterrows():
        if row['user_id'] in action_count_last_week.keys():
            df_user.at[index, 'last week action count'] = action_count_last_week[row['user_id']]
        if row['user_id'] in action_count_ratio_last_week.keys():
            df_user.at[index, 'last week action count ratio'] = action_count_ratio_last_week[row['user_id']]

df_user

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_user['last week action count'] = 0


Unnamed: 0,user_id,action count,action count ratio,day count,product diversity,1111 action count,1111 action count ratio,1111 activity ratio,last week action count,last week action count ratio
0,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152
1,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152
2,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152
3,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152
4,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152
...,...,...,...,...,...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076
381,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076
382,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076
383,95362,1,0.004630,1,0.005917,1,0.006897,1.0,1,0.005076


In [38]:
activity_ratio_last_week = {}
df_user['last week activity ratio'] = 0
for index,row in df_user.iterrows():
    if df_user.at[index,'last week action count'] != 0:
        df_user.at[index, 'last week activity ratio'] = (df_user.at[index,'last week action count']/df_user.at[index,'action count'])
df_user

Unnamed: 0,user_id,action count,action count ratio,day count,product diversity,1111 action count,1111 action count ratio,1111 activity ratio,last week action count,last week action count ratio,last week activity ratio
0,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0
1,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0
2,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0
3,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0
4,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0
...,...,...,...,...,...,...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0
381,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0
382,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0
383,95362,1,0.004630,1,0.005917,1,0.006897,1.0,1,0.005076,1.0


In [39]:
#FEATURE 11: LATEST MONTH
latest_month = 1011
df_latest_month = df[df['time_stamp']>=latest_month]
df_latest_month

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
3,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
4,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
380,122632,175,1181,4760,247.0,1109,0,3.0,0.0,0
381,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
382,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
383,95362,253,962,3263,626.0,1111,0,0.0,0.0,0


In [40]:
actions_last_month = {}

# go through user_seller sub-dataframe and split entries by user_id:seller_id
for index, row in df_latest_month.iterrows():
    if row['user_id'] not in actions_last_month:
        # there may be multiple entries, so default with an empty list
        actions_last_month[row["user_id"]] = [] 
    actions_last_month[row["user_id"]].append(row["action_type"])
    
for k,v in actions_last_month.items():
    actions_last_month[k] = list(set(v)) # make each value list unique
actions_last_month

{379824.0: [0.0, 2.0],
 141307.0: [0.0, 2.0],
 252255.0: [0.0],
 309566.0: [2.0],
 206032.0: [0.0, 2.0, 3.0],
 182119.0: [0.0],
 68225.0: [0.0],
 109919.0: [0.0],
 342023.0: [0.0],
 287829.0: [0.0],
 255960.0: [0.0],
 380420.0: [0.0],
 176150.0: [0.0],
 407671.0: [0.0, 2.0, 3.0],
 244552.0: [0.0, 2.0],
 46507.0: [0.0, 2.0],
 191162.0: [0.0, 2.0],
 174308.0: [0.0, 2.0],
 134532.0: [0.0],
 355159.0: [0.0],
 62942.0: [0.0],
 353962.0: [0.0, 2.0],
 319670.0: [0.0, 2.0],
 210578.0: [0.0],
 101110.0: [0.0, 2.0],
 149798.0: [0.0],
 6018.0: [0.0],
 281262.0: [0.0],
 314519.0: [2.0],
 205257.0: [0.0],
 117839.0: [0.0],
 46699.0: [0.0, 2.0, 3.0],
 238412.0: [0.0],
 156961.0: [0.0],
 45576.0: [0.0],
 214766.0: [0.0],
 5524.0: [0.0],
 218669.0: [0.0],
 272518.0: [0.0],
 277770.0: [0.0],
 386295.0: [0.0],
 402251.0: [0.0],
 254609.0: [0.0],
 84043.0: [0.0],
 120820.0: [0.0],
 291453.0: [0.0],
 280293.0: [0.0],
 291386.0: [0.0],
 313634.0: [0.0, 2.0],
 60260.0: [0.0],
 112997.0: [0.0],
 305925.0: [0

In [41]:
action_count_last_month = {}
for key in actions_last_month.keys():
    action_count_last_month[key] = len(actions_last_month[key])
action_count_last_month

{379824.0: 2,
 141307.0: 2,
 252255.0: 1,
 309566.0: 1,
 206032.0: 3,
 182119.0: 1,
 68225.0: 1,
 109919.0: 1,
 342023.0: 1,
 287829.0: 1,
 255960.0: 1,
 380420.0: 1,
 176150.0: 1,
 407671.0: 3,
 244552.0: 2,
 46507.0: 2,
 191162.0: 2,
 174308.0: 2,
 134532.0: 1,
 355159.0: 1,
 62942.0: 1,
 353962.0: 2,
 319670.0: 2,
 210578.0: 1,
 101110.0: 2,
 149798.0: 1,
 6018.0: 1,
 281262.0: 1,
 314519.0: 1,
 205257.0: 1,
 117839.0: 1,
 46699.0: 3,
 238412.0: 1,
 156961.0: 1,
 45576.0: 1,
 214766.0: 1,
 5524.0: 1,
 218669.0: 1,
 272518.0: 1,
 277770.0: 1,
 386295.0: 1,
 402251.0: 1,
 254609.0: 1,
 84043.0: 1,
 120820.0: 1,
 291453.0: 1,
 280293.0: 1,
 291386.0: 1,
 313634.0: 2,
 60260.0: 1,
 112997.0: 1,
 305925.0: 2,
 199429.0: 1,
 185381.0: 1,
 144281.0: 1,
 33618.0: 1,
 276857.0: 2,
 130186.0: 2,
 376338.0: 2,
 867.0: 1,
 338395.0: 2,
 335587.0: 2,
 409283.0: 1,
 165355.0: 1,
 55784.0: 1,
 327782.0: 2,
 264657.0: 2,
 52751.0: 1,
 33745.0: 1,
 316235.0: 3,
 380439.0: 1,
 86171.0: 1,
 408759.0: 

In [42]:
total_action_count_last_month = sum(action_count_last_month.values())
total_action_count_last_month

204

In [43]:
action_count_ratio_last_month = {}
for key in action_count_last_month:
    action_count_ratio_last_month[key] = action_count_last_month[key]/total_action_count_last_month
action_count_ratio_last_month

{379824.0: 0.00980392156862745,
 141307.0: 0.00980392156862745,
 252255.0: 0.004901960784313725,
 309566.0: 0.004901960784313725,
 206032.0: 0.014705882352941176,
 182119.0: 0.004901960784313725,
 68225.0: 0.004901960784313725,
 109919.0: 0.004901960784313725,
 342023.0: 0.004901960784313725,
 287829.0: 0.004901960784313725,
 255960.0: 0.004901960784313725,
 380420.0: 0.004901960784313725,
 176150.0: 0.004901960784313725,
 407671.0: 0.014705882352941176,
 244552.0: 0.00980392156862745,
 46507.0: 0.00980392156862745,
 191162.0: 0.00980392156862745,
 174308.0: 0.00980392156862745,
 134532.0: 0.004901960784313725,
 355159.0: 0.004901960784313725,
 62942.0: 0.004901960784313725,
 353962.0: 0.00980392156862745,
 319670.0: 0.00980392156862745,
 210578.0: 0.004901960784313725,
 101110.0: 0.00980392156862745,
 149798.0: 0.004901960784313725,
 6018.0: 0.004901960784313725,
 281262.0: 0.004901960784313725,
 314519.0: 0.004901960784313725,
 205257.0: 0.004901960784313725,
 117839.0: 0.00490196078

In [44]:
df_user['last month action count'] = 0
df_user['last month action count ratio'] = 0
for index,row in df_user.iterrows():
        if row['user_id'] in action_count_last_month.keys():
            df_user.at[index, 'last month action count'] = action_count_last_month[row['user_id']]
        if row['user_id'] in action_count_ratio_last_month.keys():
            df_user.at[index, 'last month action count ratio'] = action_count_ratio_last_month[row['user_id']]

df_user

Unnamed: 0,user_id,action count,action count ratio,day count,product diversity,1111 action count,1111 action count ratio,1111 activity ratio,last week action count,last week action count ratio,last week activity ratio,last month action count,last month action count ratio
0,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804
1,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804
2,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804
3,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804
4,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804
...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0,1,0.004902
381,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0,1,0.004902
382,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0,1,0.004902
383,95362,1,0.004630,1,0.005917,1,0.006897,1.0,1,0.005076,1.0,1,0.004902


In [45]:
activity_ratio_last_month = {}
df_user['last month activity ratio'] = 0
for index,row in df_user.iterrows():
    if df_user.at[index,'last month action count'] != 0:
        df_user.at[index, 'last month activity ratio'] = (df_user.at[index,'last month action count']/df_user.at[index,'action count'])
df_user

Unnamed: 0,user_id,action count,action count ratio,day count,product diversity,1111 action count,1111 action count ratio,1111 activity ratio,last week action count,last week action count ratio,last week activity ratio,last month action count,last month action count ratio,last month activity ratio
0,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804,1
1,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804,1
2,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804,1
3,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804,1
4,379824,2,0.009259,2,0.005917,2,0.013793,1.0,2,0.010152,1.0,2,0.009804,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0,1,0.004902,1
381,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0,1,0.004902,1
382,122632,1,0.004630,2,0.005917,0,0.000000,0.0,1,0.005076,1.0,1,0.004902,1
383,95362,1,0.004630,1,0.005917,1,0.006897,1.0,1,0.005076,1.0,1,0.004902,1


In [46]:
print("TOTAL ACTIONS: ", total_action_count)
print("TOTAL ACTIONS LATEST MONTH: ",total_action_count_last_month)
print("TOTAL ACTIONS LATEST WEEK: ",total_action_count_last_week)
print("TOTAL ACTIONS 1111: ",total_action_count_11)

TOTAL ACTIONS:  216
TOTAL ACTIONS LATEST MONTH:  204
TOTAL ACTIONS LATEST WEEK:  197
TOTAL ACTIONS 1111:  145
