In [2]:
import pandas as pd
import numpy as np
import sklearn
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
'''
FEATURES:
Overall action count/ratio          DONE
Overall day count                   DONE
Monthly action count/ratio      TODO
Penetration                     TODO
Monthly Aggregation             TODO
Double 11 Features                  DONE
Latest one-week                     DONE
Repeat Buyer Features           TODO
Age Related Features            TODO
Gender Related Features         TODO
'''

'\nFEATURES:\nOverall action count/ratio\nOverall day count\nMonthly action count/ratio\nPenetration\nMonthly Aggregation\nDouble 11 Features\nLatest one-week\nRepeat Buyer Features\nAge Related Features\nGender Related Features\n'

In [3]:
df = pd.read_csv("./use_data/expanded_training.csv")
df

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
3,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
4,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
380,122632,175,1181,4760,247.0,1109,0,3.0,0.0,0
381,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
382,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
383,95362,253,962,3263,626.0,1111,0,0.0,0.0,0


In [4]:
df_item = df[['item_id']]
df_item

Unnamed: 0,item_id
0,198
1,198
2,198
3,198
4,198
...,...
380,175
381,175
382,175
383,253


In [5]:
#FEATURE 1: OVERALL ACTION COUNT/RATIO
actions = {}
for index, row in df.iterrows():
    if row['item_id'] not in actions:
        # there may be multiple entries, so default with an empty list
        actions[row["item_id"]] = [] 
    actions[row["item_id"]].append(row["action_type"])

In [6]:
actions

{198.0: [0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 175.0: [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  3.0,
  3.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  3.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 281.0: [0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  3.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0],
 279.0: [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  

In [7]:
action_count = {}
for key in actions.keys():
    action_count[key] = len(actions[key])
action_count


{198.0: 11,
 175.0: 105,
 281.0: 19,
 279.0: 85,
 190.0: 6,
 219.0: 21,
 285.0: 2,
 278.0: 2,
 224.0: 12,
 184.0: 25,
 312.0: 4,
 253.0: 18,
 274.0: 1,
 226.0: 4,
 209.0: 3,
 319.0: 1,
 170.0: 2,
 211.0: 30,
 277.0: 6,
 291.0: 6,
 221.0: 9,
 169.0: 2,
 186.0: 7,
 269.0: 1,
 215.0: 1,
 315.0: 1,
 191.0: 1}

In [8]:
total_action_count = sum(action_count.values())
total_action_count

385

In [9]:
action_count_ratio = {}
for key in action_count:
    action_count_ratio[key] = action_count[key]/total_action_count
action_count_ratio

{198.0: 0.02857142857142857,
 175.0: 0.2727272727272727,
 281.0: 0.04935064935064935,
 279.0: 0.22077922077922077,
 190.0: 0.015584415584415584,
 219.0: 0.05454545454545454,
 285.0: 0.005194805194805195,
 278.0: 0.005194805194805195,
 224.0: 0.03116883116883117,
 184.0: 0.06493506493506493,
 312.0: 0.01038961038961039,
 253.0: 0.046753246753246755,
 274.0: 0.0025974025974025974,
 226.0: 0.01038961038961039,
 209.0: 0.007792207792207792,
 319.0: 0.0025974025974025974,
 170.0: 0.005194805194805195,
 211.0: 0.07792207792207792,
 277.0: 0.015584415584415584,
 291.0: 0.015584415584415584,
 221.0: 0.023376623376623377,
 169.0: 0.005194805194805195,
 186.0: 0.01818181818181818,
 269.0: 0.0025974025974025974,
 215.0: 0.0025974025974025974,
 315.0: 0.0025974025974025974,
 191.0: 0.0025974025974025974}

In [10]:
df_item['item action count'] = -1
df_item['item action count ratio'] = -1
for index,row in df_item.iterrows():
        df_item.at[index, 'item action count'] = action_count[row['item_id']]
        df_item.at[index, 'item action count ratio'] = action_count_ratio[row['item_id']]

df_item

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item action count'] = -1
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item action count ratio'] = -1


Unnamed: 0,item_id,item action count,item action count ratio
0,198,11,0.028571
1,198,11,0.028571
2,198,11,0.028571
3,198,11,0.028571
4,198,11,0.028571
...,...,...,...
380,175,105,0.272727
381,175,105,0.272727
382,175,105,0.272727
383,253,18,0.046753


In [14]:
#FEATURE 2: OVERALL DAY COUNT
days = {}

# go through user_seller sub-dataframe and split entries by item_id:seller_id
for index, row in df.iterrows():
    if row['item_id'] not in days:
        # there may be multiple entries, so default with an empty list
        days[row["item_id"]] = [] 
    days[row["item_id"]].append(row["time_stamp"])

In [15]:
for k,v in days.items():
    days[k] = list(set(v)) # make each value list unique

days

{198.0: [1105.0, 1110.0, 1111.0],
 175.0: [1026.0,
  1101.0,
  1103.0,
  1106.0,
  1107.0,
  1108.0,
  1109.0,
  1110.0,
  1111.0],
 281.0: [1103.0, 1104.0, 1105.0, 1107.0, 1108.0, 1109.0, 1111.0],
 279.0: [1030.0,
  617.0,
  522.0,
  524.0,
  529.0,
  1105.0,
  1107.0,
  1108.0,
  1109.0,
  1110.0,
  1111.0,
  603.0],
 190.0: [1105.0, 1108.0, 1109.0, 1111.0],
 219.0: [1105.0, 1109.0, 1111.0],
 285.0: [520.0, 1110.0],
 278.0: [1111.0],
 224.0: [1108.0, 1109.0, 1111.0],
 184.0: [1109.0, 1111.0],
 312.0: [1108.0, 1022.0, 1103.0],
 253.0: [1110.0, 1111.0],
 274.0: [1111.0],
 226.0: [1111.0],
 209.0: [1110.0, 1111.0],
 319.0: [1111.0],
 170.0: [528.0, 601.0],
 211.0: [1103.0, 1105.0, 1106.0, 1108.0, 1109.0, 1110.0, 1111.0],
 277.0: [801.0, 627.0, 804.0, 806.0],
 291.0: [1111.0],
 221.0: [1026.0, 1101.0, 1110.0, 1111.0],
 169.0: [1110.0],
 186.0: [1110.0, 1111.0],
 269.0: [1110.0],
 215.0: [1001.0],
 315.0: [1111.0],
 191.0: [1109.0]}

In [16]:
day_count = {}
for key in days.keys():
    day_count[key] = len(days[key])
day_count

{198.0: 3,
 175.0: 9,
 281.0: 7,
 279.0: 12,
 190.0: 4,
 219.0: 3,
 285.0: 2,
 278.0: 1,
 224.0: 3,
 184.0: 2,
 312.0: 3,
 253.0: 2,
 274.0: 1,
 226.0: 1,
 209.0: 2,
 319.0: 1,
 170.0: 2,
 211.0: 7,
 277.0: 4,
 291.0: 1,
 221.0: 4,
 169.0: 1,
 186.0: 2,
 269.0: 1,
 215.0: 1,
 315.0: 1,
 191.0: 1}

In [17]:
df_item['item day count'] = -1
for index,row in df_item.iterrows():
        df_item.at[index, 'item day count'] = day_count[row['item_id']]

df_item

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item day count'] = -1


Unnamed: 0,item_id,item action count,item action count ratio,item day count
0,198,11,0.028571,3
1,198,11,0.028571,3
2,198,11,0.028571,3
3,198,11,0.028571,3
4,198,11,0.028571,3
...,...,...,...,...
380,175,105,0.272727,9
381,175,105,0.272727,9
382,175,105,0.272727,9
383,253,18,0.046753,2


In [18]:
#DOUBLE 11 FEATURES
df_11 = df[df['time_stamp']==1111]
df_11

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
9,141307,175,1181,4760,247.0,1111,0,4.0,1.0,0
10,141307,175,1181,4760,247.0,1111,0,4.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
375,289079,279,898,3323,683.0,1111,0,4.0,1.0,1
377,403117,175,1181,4760,247.0,1111,0,2.0,1.0,0
378,36385,219,349,1943,6208.0,1111,2,0.0,0.0,0
379,36385,219,349,1943,6208.0,1111,0,0.0,0.0,0


In [19]:
actions_11 = {}

# go through user_seller sub-dataframe and split entries by item_id:seller_id
for index, row in df_11.iterrows():
    if row['item_id'] not in actions_11:
        # there may be multiple entries, so default with an empty list
        actions_11[row["item_id"]] = [] 
    actions_11[row["item_id"]].append(row["action_type"])

actions_11

{198.0: [0.0, 0.0, 2.0, 0.0, 0.0, 0.0],
 175.0: [0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0],
 281.0: [2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0],
 279.0: [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 219.0: [0.0,
  2.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.

In [20]:
action_count_11 = {}
for key in actions_11.keys():
    action_count_11[key] = len(actions_11[key])
action_count_11

{198.0: 6,
 175.0: 67,
 281.0: 11,
 279.0: 46,
 219.0: 17,
 278.0: 2,
 224.0: 10,
 184.0: 24,
 253.0: 13,
 274.0: 1,
 226.0: 4,
 319.0: 1,
 211.0: 12,
 291.0: 6,
 221.0: 3,
 209.0: 1,
 190.0: 1,
 186.0: 2,
 315.0: 1}

In [21]:
total_action_count_11 = sum(action_count_11.values())
total_action_count_11

228

In [22]:
action_count_ratio_11 = {}
for key in action_count_11:
    action_count_ratio_11[key] = action_count_11[key]/total_action_count_11
action_count_ratio_11

{198.0: 0.02631578947368421,
 175.0: 0.29385964912280704,
 281.0: 0.04824561403508772,
 279.0: 0.20175438596491227,
 219.0: 0.07456140350877193,
 278.0: 0.008771929824561403,
 224.0: 0.043859649122807015,
 184.0: 0.10526315789473684,
 253.0: 0.05701754385964912,
 274.0: 0.0043859649122807015,
 226.0: 0.017543859649122806,
 319.0: 0.0043859649122807015,
 211.0: 0.05263157894736842,
 291.0: 0.02631578947368421,
 221.0: 0.013157894736842105,
 209.0: 0.0043859649122807015,
 190.0: 0.0043859649122807015,
 186.0: 0.008771929824561403,
 315.0: 0.0043859649122807015}

In [23]:
df_item['item 1111 action count'] = 0
df_item['item 1111 action count ratio'] = 0
for index,row in df_item.iterrows():
        if row['item_id'] in action_count_11.keys():
            df_item.at[index, 'item 1111 action count'] = action_count_11[row['item_id']]
        if row['item_id'] in action_count_ratio_11.keys():
            df_item.at[index, 'item 1111 action count ratio'] = action_count_ratio_11[row['item_id']]

df_item

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item 1111 action count'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item 1111 action count ratio'] = 0


Unnamed: 0,item_id,item action count,item action count ratio,item day count,item 1111 action count,item 1111 action count ratio
0,198,11,0.028571,3,6,0.026316
1,198,11,0.028571,3,6,0.026316
2,198,11,0.028571,3,6,0.026316
3,198,11,0.028571,3,6,0.026316
4,198,11,0.028571,3,6,0.026316
...,...,...,...,...,...,...
380,175,105,0.272727,9,67,0.293860
381,175,105,0.272727,9,67,0.293860
382,175,105,0.272727,9,67,0.293860
383,253,18,0.046753,2,13,0.057018


In [25]:
activity_ratio_1111 = {}
df_item['item 1111 activity ratio'] = 0
for index,row in df_item.iterrows():
    if df_item.at[index,'item 1111 action count'] != 0:
        df_item.at[index, 'item 1111 activity ratio'] = (df_item.at[index,'item 1111 action count']/df_item.at[index,'item action count'])
df_item

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item 1111 activity ratio'] = 0


Unnamed: 0,item_id,item action count,item action count ratio,item day count,item 1111 action count,item 1111 action count ratio,item 1111 activity ratio
0,198,11,0.028571,3,6,0.026316,0.545455
1,198,11,0.028571,3,6,0.026316,0.545455
2,198,11,0.028571,3,6,0.026316,0.545455
3,198,11,0.028571,3,6,0.026316,0.545455
4,198,11,0.028571,3,6,0.026316,0.545455
...,...,...,...,...,...,...,...
380,175,105,0.272727,9,67,0.293860,0.638095
381,175,105,0.272727,9,67,0.293860,0.638095
382,175,105,0.272727,9,67,0.293860,0.638095
383,253,18,0.046753,2,13,0.057018,0.722222


In [26]:
#FEATURE 10: LATEST ONE-WEEK
latest_week = 1104
df_latest_week = df[df['time_stamp']>=latest_week]
df_latest_week

Unnamed: 0,user_id,item_id,cat_id,seller_id,brand_id,time_stamp,action_type,age_range,gender,label
0,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
1,379824,198,656,145,3462.0,1111,0,5.0,1.0,0
2,379824,198,656,145,3462.0,1111,2,5.0,1.0,0
3,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
4,379824,198,656,145,3462.0,1110,0,5.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...
380,122632,175,1181,4760,247.0,1109,0,3.0,0.0,0
381,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
382,122632,175,1181,4760,247.0,1108,0,3.0,0.0,0
383,95362,253,962,3263,626.0,1111,0,0.0,0.0,0


In [27]:
actions_last_week = {}

# go through user_seller sub-dataframe and split entries by item_id:seller_id
for index, row in df_latest_week.iterrows():
    if row['item_id'] not in actions_last_week:
        # there may be multiple entries, so default with an empty list
        actions_last_week[row["item_id"]] = [] 
    actions_last_week[row["item_id"]].append(row["action_type"])
    
#for k,v in actions_last_week.items():
#    actions_last_week[k] = list(set(v)) # make each value list unique
actions_last_week

{198.0: [0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
 175.0: [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  3.0,
  3.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  3.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0],
 281.0: [0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0,
  2.0,
  0.0,
  0.0,
  0.0,
  0.0],
 279.0: [0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  0.0,
  

In [28]:
action_count_last_week = {}
for key in actions_last_week.keys():
    action_count_last_week[key] = len(actions_last_week[key])
action_count_last_week

{198.0: 11,
 175.0: 101,
 281.0: 18,
 279.0: 75,
 190.0: 6,
 219.0: 21,
 278.0: 2,
 224.0: 12,
 184.0: 25,
 253.0: 18,
 274.0: 1,
 226.0: 4,
 209.0: 3,
 319.0: 1,
 211.0: 29,
 285.0: 1,
 291.0: 6,
 221.0: 7,
 169.0: 2,
 186.0: 7,
 269.0: 1,
 312.0: 2,
 315.0: 1,
 191.0: 1}

In [29]:
total_action_count_last_week = sum(action_count_last_week.values())
total_action_count_last_week


355

In [30]:
action_count_ratio_last_week = {}
for key in action_count_last_week:
    action_count_ratio_last_week[key] = action_count_last_week[key]/total_action_count_last_week
action_count_ratio_last_week

{198.0: 0.030985915492957747,
 175.0: 0.28450704225352114,
 281.0: 0.05070422535211268,
 279.0: 0.2112676056338028,
 190.0: 0.016901408450704224,
 219.0: 0.059154929577464786,
 278.0: 0.005633802816901409,
 224.0: 0.03380281690140845,
 184.0: 0.07042253521126761,
 253.0: 0.05070422535211268,
 274.0: 0.0028169014084507044,
 226.0: 0.011267605633802818,
 209.0: 0.008450704225352112,
 319.0: 0.0028169014084507044,
 211.0: 0.08169014084507042,
 285.0: 0.0028169014084507044,
 291.0: 0.016901408450704224,
 221.0: 0.01971830985915493,
 169.0: 0.005633802816901409,
 186.0: 0.01971830985915493,
 269.0: 0.0028169014084507044,
 312.0: 0.005633802816901409,
 315.0: 0.0028169014084507044,
 191.0: 0.0028169014084507044}

In [31]:
df_item['item last week action count'] = 0
df_item['item last week action count ratio'] = 0
for index,row in df_item.iterrows():
        if row['item_id'] in action_count_last_week.keys():
            df_item.at[index, 'item last week action count'] = action_count_last_week[row['item_id']]
        if row['item_id'] in action_count_ratio_last_week.keys():
            df_item.at[index, 'item last week action count ratio'] = action_count_ratio_last_week[row['item_id']]

df_item

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item last week action count'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_item['item last week action count ratio'] = 0


Unnamed: 0,item_id,item action count,item action count ratio,item day count,item 1111 action count,item 1111 action count ratio,item 1111 activity ratio,item last week action count,item last week action count ratio
0,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986
1,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986
2,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986
3,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986
4,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986
...,...,...,...,...,...,...,...,...,...
380,175,105,0.272727,9,67,0.293860,0.638095,101,0.284507
381,175,105,0.272727,9,67,0.293860,0.638095,101,0.284507
382,175,105,0.272727,9,67,0.293860,0.638095,101,0.284507
383,253,18,0.046753,2,13,0.057018,0.722222,18,0.050704


In [32]:
activity_ratio_last_week = {}
df_item['item last week activity ratio'] = 0
for index,row in df_item.iterrows():
    if df_item.at[index,'item last week action count'] != 0:
        df_item.at[index, 'item last week activity ratio'] = (df_item.at[index,'item last week action count']/df_item.at[index,'item action count'])
df_item

Unnamed: 0,item_id,item action count,item action count ratio,item day count,item 1111 action count,item 1111 action count ratio,item 1111 activity ratio,item last week action count,item last week action count ratio,item last week activity ratio
0,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986,1.000000
1,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986,1.000000
2,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986,1.000000
3,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986,1.000000
4,198,11,0.028571,3,6,0.026316,0.545455,11,0.030986,1.000000
...,...,...,...,...,...,...,...,...,...,...
380,175,105,0.272727,9,67,0.293860,0.638095,101,0.284507,0.961905
381,175,105,0.272727,9,67,0.293860,0.638095,101,0.284507,0.961905
382,175,105,0.272727,9,67,0.293860,0.638095,101,0.284507,0.961905
383,253,18,0.046753,2,13,0.057018,0.722222,18,0.050704,1.000000
