In [151]:
import pandas as pd
import numpy as np
import math
import json
%matplotlib inline

# read in the json files
portfolio = pd.read_json('data/portfolio.json', orient='records', lines=True)
profile = pd.read_json('data/profile.json', orient='records', lines=True)
transcript = pd.read_json('data/transcript.json', orient='records', lines=True)

In [86]:
transactions = transcript[transcript.event == 'transaction']
transactions

Unnamed: 0,person,event,value,time
12654,02c083884c7d45b39cc68e1314fec56c,transaction,{'amount': 0.8300000000000001},0
12657,9fa9ae8f57894cc9a3b8a9bbe0fc1b2f,transaction,{'amount': 34.56},0
12659,54890f68699049c2a04d415abc25e717,transaction,{'amount': 13.23},0
12670,b2f1cd155b864803ad8334cdf13c4bd2,transaction,{'amount': 19.51},0
12671,fe97aa22dd3e48c8b143116a8403dd52,transaction,{'amount': 18.97},0
...,...,...,...,...
306529,b3a1272bc9904337b331bf348c3e8c17,transaction,{'amount': 1.5899999999999999},714
306530,68213b08d99a4ae1b0dcb72aebd9aa35,transaction,{'amount': 9.53},714
306531,a00058cf10334a308c68e7631c529907,transaction,{'amount': 3.61},714
306532,76ddbd6576844afe811f1a3c0fbb5bec,transaction,{'amount': 3.5300000000000002},714


In [87]:
transactions['amount'] = transactions.value.apply(lambda x: x['amount'])
transactions.amount

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


12654      0.83
12657     34.56
12659     13.23
12670     19.51
12671     18.97
          ...  
306529     1.59
306530     9.53
306531     3.61
306532     3.53
306533     4.05
Name: amount, Length: 138953, dtype: float64

In [8]:
transactions[["person", "amount"]].to_csv("data/transactions.csv")

In [88]:
amounts = transactions.groupby('person').sum()[['amount']]
amounts

Unnamed: 0_level_0,amount
person,Unnamed: 1_level_1
0009655768c64bdeb2e877511632db8f,127.60
00116118485d4dfda04fdbaba9a87b5c,4.09
0011e0d4e6b944f998e987f904e8c1e5,79.46
0020c2b971eb4e9188eac86d93036a77,196.86
0020ccbbb6d84e358d3414a3ff76cffd,154.05
...,...
fff3ba4757bd42088c044ca26d73817a,580.98
fff7576017104bcc8677a8d63322b5e1,29.94
fff8957ea8b240a6b5e634b6ee8eafcf,12.15
fffad4f4828548d1b5583907f2e9906b,88.83


In [89]:
transcript[transcript.event.isin(offers)].value.apply(lambda x: x.keys())

Series([], Name: value, dtype: object)

In [92]:
offer_columns = ['offer received', 'offer viewed', 'time']
offers = transcript[transcript.event.isin(offer_columns)][['person', 'event', 'value', 'time']]
offers.value = offers.value.apply(lambda x: x['offer id'])
offers.columns = ['person', 'event', 'offer_id', 'time']
offers

Unnamed: 0,person,event,offer_id,time
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,9b98b8c7a33c4b65b9aebfe6a799e6d9,0
1,a03223e636434f42ac4c3df47e8bac43,offer received,0b1e1539f2cc45b7b9fa7c272da2e1d7,0
2,e2127556f4f64592b11af22de27a7932,offer received,2906b810c7d4411798c6938adc9daaa5,0
3,8ec6ce2a7e7949b1bf142def7d0e0586,offer received,fafdcd668e3743c1bb461111dcafc2a4,0
4,68617ca6246f4fbc85e91a2a49552598,offer received,4d5c57ea9a6940dd891ad53e9dbe8da0,0
...,...,...,...,...
306441,d56386cf344c4829bbf420d1895dca37,offer viewed,5a8bc65990b245e5a138643cd4eb9837,714
306450,9b51e8797290403b90d09d864dec4b94,offer viewed,3f207df678b143eea3cee63160fa8bed,714
306483,84fb57a7fe8045a8bf6236738ee73a0f,offer viewed,5a8bc65990b245e5a138643cd4eb9837,714
306490,abc4359eb34e4e2ca2349da2ddf771b6,offer viewed,3f207df678b143eea3cee63160fa8bed,714


In [94]:
# offers.person.unique().shape[0]
# offers.duplicated().sum()
offers.pivot_table(index=['person', 'event', 'offer_id'], aggfunc='size')

person                            event           offer_id                        
0009655768c64bdeb2e877511632db8f  offer received  2906b810c7d4411798c6938adc9daaa5    1
                                                  3f207df678b143eea3cee63160fa8bed    1
                                                  5a8bc65990b245e5a138643cd4eb9837    1
                                                  f19421c1d4aa40978ebb69ca19b0e20d    1
                                                  fafdcd668e3743c1bb461111dcafc2a4    1
                                                                                     ..
ffff82501cea40309d5fdd7edcca4a07  offer received  fafdcd668e3743c1bb461111dcafc2a4    1
                                  offer viewed    0b1e1539f2cc45b7b9fa7c272da2e1d7    1
                                                  2906b810c7d4411798c6938adc9daaa5    3
                                                  9b98b8c7a33c4b65b9aebfe6a799e6d9    1
                                     

In [66]:
total_offer_counts = total_offers.pivot_table(index=['person'], columns=['event'], aggfunc='size')
total_offer_counts

event,offer completed,offer received,offer viewed
person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0009655768c64bdeb2e877511632db8f,3.0,5.0,4.0
00116118485d4dfda04fdbaba9a87b5c,,2.0,2.0
0011e0d4e6b944f998e987f904e8c1e5,3.0,5.0,5.0
0020c2b971eb4e9188eac86d93036a77,3.0,5.0,3.0
0020ccbbb6d84e358d3414a3ff76cffd,3.0,4.0,4.0
...,...,...,...
fff3ba4757bd42088c044ca26d73817a,3.0,6.0,3.0
fff7576017104bcc8677a8d63322b5e1,3.0,5.0,4.0
fff8957ea8b240a6b5e634b6ee8eafcf,,3.0,2.0
fffad4f4828548d1b5583907f2e9906b,3.0,4.0,4.0


In [82]:
total_offer_counts = total_offers.pivot_table(index=['offer_id'], columns=['event'], aggfunc='size')
total_offer_counts

event,offer completed,offer received,offer viewed
offer_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0b1e1539f2cc45b7b9fa7c272da2e1d7,3420.0,7668.0,2663.0
2298d6c36e964ae4a3e7e9706d1fb8c2,5156.0,7646.0,7337.0
2906b810c7d4411798c6938adc9daaa5,4017.0,7632.0,4118.0
3f207df678b143eea3cee63160fa8bed,,7617.0,4144.0
4d5c57ea9a6940dd891ad53e9dbe8da0,3331.0,7593.0,7298.0
5a8bc65990b245e5a138643cd4eb9837,,7618.0,6687.0
9b98b8c7a33c4b65b9aebfe6a799e6d9,4354.0,7677.0,4171.0
ae264e3637204a6fb9bb56bc8210ddfd,3688.0,7658.0,6716.0
f19421c1d4aa40978ebb69ca19b0e20d,4296.0,7571.0,7264.0
fafdcd668e3743c1bb461111dcafc2a4,5317.0,7597.0,7327.0


In [95]:
offer_completed = transcript[transcript.event == 'offer completed'][['person', 'event', 'value', 'time']]
offer_completed['offer_id'] = offer_completed.value.apply(lambda x: x['offer_id']).to_list()
offer_completed['reward'] = offer_completed.value.apply(lambda x: x['reward']).to_list()
offer_completed = offer_completed[['person', 'event', 'offer_id', 'reward', 'time']]
offer_completed

Unnamed: 0,person,event,offer_id,reward,time
12658,9fa9ae8f57894cc9a3b8a9bbe0fc1b2f,offer completed,2906b810c7d4411798c6938adc9daaa5,2,0
12672,fe97aa22dd3e48c8b143116a8403dd52,offer completed,fafdcd668e3743c1bb461111dcafc2a4,2,0
12679,629fc02d56414d91bca360decdfa9288,offer completed,9b98b8c7a33c4b65b9aebfe6a799e6d9,5,0
12692,676506bad68e4161b9bbaffeb039626b,offer completed,ae264e3637204a6fb9bb56bc8210ddfd,10,0
12697,8f7dd3b2afe14c078eb4f6e6fe4ba97d,offer completed,4d5c57ea9a6940dd891ad53e9dbe8da0,10,0
...,...,...,...,...,...
306475,0c027f5f34dd4b9eba0a25785c611273,offer completed,2298d6c36e964ae4a3e7e9706d1fb8c2,3,714
306497,a6f84f4e976f44508c358cc9aba6d2b3,offer completed,2298d6c36e964ae4a3e7e9706d1fb8c2,3,714
306506,b895c57e8cd047a8872ce02aa54759d6,offer completed,fafdcd668e3743c1bb461111dcafc2a4,2,714
306509,8431c16f8e1d440880db371a68f82dd0,offer completed,fafdcd668e3743c1bb461111dcafc2a4,2,714


In [96]:
offer_completed.duplicated().sum()

397

In [97]:
# offer_completed.to_csv("data/offer_completed.csv")
# offers.to_csv("data/offers.csv")
total_offers = pd.concat([offers, offer_completed], axis=0)
total_offers.to_csv("data/offers.csv")
total_offers

Unnamed: 0,person,event,offer_id,time,reward
0,78afa995795e4d85b5d9ceeca43f5fef,offer received,9b98b8c7a33c4b65b9aebfe6a799e6d9,0,
1,a03223e636434f42ac4c3df47e8bac43,offer received,0b1e1539f2cc45b7b9fa7c272da2e1d7,0,
2,e2127556f4f64592b11af22de27a7932,offer received,2906b810c7d4411798c6938adc9daaa5,0,
3,8ec6ce2a7e7949b1bf142def7d0e0586,offer received,fafdcd668e3743c1bb461111dcafc2a4,0,
4,68617ca6246f4fbc85e91a2a49552598,offer received,4d5c57ea9a6940dd891ad53e9dbe8da0,0,
...,...,...,...,...,...
306475,0c027f5f34dd4b9eba0a25785c611273,offer completed,2298d6c36e964ae4a3e7e9706d1fb8c2,714,3.0
306497,a6f84f4e976f44508c358cc9aba6d2b3,offer completed,2298d6c36e964ae4a3e7e9706d1fb8c2,714,3.0
306506,b895c57e8cd047a8872ce02aa54759d6,offer completed,fafdcd668e3743c1bb461111dcafc2a4,714,2.0
306509,8431c16f8e1d440880db371a68f82dd0,offer completed,fafdcd668e3743c1bb461111dcafc2a4,714,2.0


In [98]:
total_offers[total_offers.event == 'offer received'].pivot_table(index='person', columns='offer_id', aggfunc='size')

offer_id,0b1e1539f2cc45b7b9fa7c272da2e1d7,2298d6c36e964ae4a3e7e9706d1fb8c2,2906b810c7d4411798c6938adc9daaa5,3f207df678b143eea3cee63160fa8bed,4d5c57ea9a6940dd891ad53e9dbe8da0,5a8bc65990b245e5a138643cd4eb9837,9b98b8c7a33c4b65b9aebfe6a799e6d9,ae264e3637204a6fb9bb56bc8210ddfd,f19421c1d4aa40978ebb69ca19b0e20d,fafdcd668e3743c1bb461111dcafc2a4
person,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0009655768c64bdeb2e877511632db8f,,,1.0,1.0,,1.0,,,1.0,1.0
00116118485d4dfda04fdbaba9a87b5c,,,,,,,,,2.0,
0011e0d4e6b944f998e987f904e8c1e5,1.0,1.0,,1.0,,1.0,1.0,,,
0020c2b971eb4e9188eac86d93036a77,,,,,1.0,1.0,,1.0,,2.0
0020ccbbb6d84e358d3414a3ff76cffd,,1.0,,,,1.0,1.0,,1.0,
...,...,...,...,...,...,...,...,...,...,...
fff3ba4757bd42088c044ca26d73817a,,,2.0,,,2.0,1.0,,,1.0
fff7576017104bcc8677a8d63322b5e1,,,,,1.0,,1.0,1.0,,2.0
fff8957ea8b240a6b5e634b6ee8eafcf,,,,1.0,1.0,,,,,1.0
fffad4f4828548d1b5583907f2e9906b,,,,,,1.0,1.0,,2.0,


In [99]:
completed = total_offers[total_offers.event == 'offer completed']
completed.merge(portfolio, left_on='offer_id', right_on='id').offer_type.unique()

array(['discount', 'bogo'], dtype=object)

In [81]:
completed = total_offers[total_offers.event == 'offer viewed']
completed.merge(portfolio, left_on='offer_id', right_on='id').offer_type.unique()

array(['bogo', 'informational', 'discount'], dtype=object)

In [100]:
merged = profile.merge(offers, left_on=['id'], right_on)
merged

SyntaxError: positional argument follows keyword argument (<ipython-input-100-9eebc70bbc9e>, line 1)

In [110]:
profile

Unnamed: 0_level_0,gender,age,became_member_on,income
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
68be06ca386d4c31939f3a4f0e3dd783,,118,20170212,
0610b486422d4921ae7d2bf64640c50b,F,55,20170715,112000.0
38fe809add3b4fcf9315a9694bb96ff5,,118,20180712,
78afa995795e4d85b5d9ceeca43f5fef,F,75,20170509,100000.0
a03223e636434f42ac4c3df47e8bac43,,118,20170804,
...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,F,45,20180604,54000.0
2cb4f97358b841b9a9773a7aa05a9d77,M,61,20180713,72000.0
01d26f638c274aa0b965d24cefe3183f,M,49,20170126,73000.0
9dc1421481194dcd9400aec7c9ae6366,F,83,20160307,50000.0


In [132]:
profile.index = profile.id
profile = profile.drop(columns=['id'])
profile

Unnamed: 0_level_0,gender,age,became_member_on,income
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
68be06ca386d4c31939f3a4f0e3dd783,,118,20170212,
0610b486422d4921ae7d2bf64640c50b,F,55,20170715,112000.0
38fe809add3b4fcf9315a9694bb96ff5,,118,20180712,
78afa995795e4d85b5d9ceeca43f5fef,F,75,20170509,100000.0
a03223e636434f42ac4c3df47e8bac43,,118,20170804,
...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,F,45,20180604,54000.0
2cb4f97358b841b9a9773a7aa05a9d77,M,61,20180713,72000.0
01d26f638c274aa0b965d24cefe3183f,M,49,20170126,73000.0
9dc1421481194dcd9400aec7c9ae6366,F,83,20160307,50000.0


In [133]:
profile['income_na'] = profile.income.isna().astype(int)
profile.income = profile.income.fillna(profile.income.mean())
profile

Unnamed: 0_level_0,gender,age,became_member_on,income,income_na
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
68be06ca386d4c31939f3a4f0e3dd783,,118,20170212,65404.991568,1
0610b486422d4921ae7d2bf64640c50b,F,55,20170715,112000.000000,0
38fe809add3b4fcf9315a9694bb96ff5,,118,20180712,65404.991568,1
78afa995795e4d85b5d9ceeca43f5fef,F,75,20170509,100000.000000,0
a03223e636434f42ac4c3df47e8bac43,,118,20170804,65404.991568,1
...,...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,F,45,20180604,54000.000000,0
2cb4f97358b841b9a9773a7aa05a9d77,M,61,20180713,72000.000000,0
01d26f638c274aa0b965d24cefe3183f,M,49,20170126,73000.000000,0
9dc1421481194dcd9400aec7c9ae6366,F,83,20160307,50000.000000,0


In [134]:
profile = pd.get_dummies(profile)
profile

Unnamed: 0_level_0,age,became_member_on,income,income_na,gender_F,gender_M,gender_O
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
68be06ca386d4c31939f3a4f0e3dd783,118,20170212,65404.991568,1,0,0,0
0610b486422d4921ae7d2bf64640c50b,55,20170715,112000.000000,0,1,0,0
38fe809add3b4fcf9315a9694bb96ff5,118,20180712,65404.991568,1,0,0,0
78afa995795e4d85b5d9ceeca43f5fef,75,20170509,100000.000000,0,1,0,0
a03223e636434f42ac4c3df47e8bac43,118,20170804,65404.991568,1,0,0,0
...,...,...,...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,45,20180604,54000.000000,0,1,0,0
2cb4f97358b841b9a9773a7aa05a9d77,61,20180713,72000.000000,0,0,1,0
01d26f638c274aa0b965d24cefe3183f,49,20170126,73000.000000,0,0,1,0
9dc1421481194dcd9400aec7c9ae6366,83,20160307,50000.000000,0,1,0,0


In [135]:
profile.age = profile.age.replace({118: np.NaN})
profile['age_na'] = profile.age.isna().astype(int)
profile

Unnamed: 0_level_0,age,became_member_on,income,income_na,gender_F,gender_M,gender_O,age_na
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
68be06ca386d4c31939f3a4f0e3dd783,,20170212,65404.991568,1,0,0,0,1
0610b486422d4921ae7d2bf64640c50b,55.0,20170715,112000.000000,0,1,0,0,0
38fe809add3b4fcf9315a9694bb96ff5,,20180712,65404.991568,1,0,0,0,1
78afa995795e4d85b5d9ceeca43f5fef,75.0,20170509,100000.000000,0,1,0,0,0
a03223e636434f42ac4c3df47e8bac43,,20170804,65404.991568,1,0,0,0,1
...,...,...,...,...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,45.0,20180604,54000.000000,0,1,0,0,0
2cb4f97358b841b9a9773a7aa05a9d77,61.0,20180713,72000.000000,0,0,1,0,0
01d26f638c274aa0b965d24cefe3183f,49.0,20170126,73000.000000,0,0,1,0,0
9dc1421481194dcd9400aec7c9ae6366,83.0,20160307,50000.000000,0,1,0,0,0


In [136]:
profile.age = profile.age.fillna(profile.age.mean())
profile

Unnamed: 0_level_0,age,became_member_on,income,income_na,gender_F,gender_M,gender_O,age_na
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
68be06ca386d4c31939f3a4f0e3dd783,54.393524,20170212,65404.991568,1,0,0,0,1
0610b486422d4921ae7d2bf64640c50b,55.000000,20170715,112000.000000,0,1,0,0,0
38fe809add3b4fcf9315a9694bb96ff5,54.393524,20180712,65404.991568,1,0,0,0,1
78afa995795e4d85b5d9ceeca43f5fef,75.000000,20170509,100000.000000,0,1,0,0,0
a03223e636434f42ac4c3df47e8bac43,54.393524,20170804,65404.991568,1,0,0,0,1
...,...,...,...,...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,45.000000,20180604,54000.000000,0,1,0,0,0
2cb4f97358b841b9a9773a7aa05a9d77,61.000000,20180713,72000.000000,0,0,1,0,0
01d26f638c274aa0b965d24cefe3183f,49.000000,20170126,73000.000000,0,0,1,0,0
9dc1421481194dcd9400aec7c9ae6366,83.000000,20160307,50000.000000,0,1,0,0,0


In [139]:
profile['member_since'] = profile.became_member_on.apply(lambda x: str(x)[:4]).astype(int)
profile = profile.drop(columns='became_member_on')
profile

Unnamed: 0_level_0,age,income,income_na,gender_F,gender_M,gender_O,age_na,member_since
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
68be06ca386d4c31939f3a4f0e3dd783,54.393524,65404.991568,1,0,0,0,1,2017
0610b486422d4921ae7d2bf64640c50b,55.000000,112000.000000,0,1,0,0,0,2017
38fe809add3b4fcf9315a9694bb96ff5,54.393524,65404.991568,1,0,0,0,1,2018
78afa995795e4d85b5d9ceeca43f5fef,75.000000,100000.000000,0,1,0,0,0,2017
a03223e636434f42ac4c3df47e8bac43,54.393524,65404.991568,1,0,0,0,1,2017
...,...,...,...,...,...,...,...,...
6d5f3a774f3d4714ab0c092238f3a1d7,45.000000,54000.000000,0,1,0,0,0,2018
2cb4f97358b841b9a9773a7aa05a9d77,61.000000,72000.000000,0,0,1,0,0,2018
01d26f638c274aa0b965d24cefe3183f,49.000000,73000.000000,0,0,1,0,0,2017
9dc1421481194dcd9400aec7c9ae6366,83.000000,50000.000000,0,1,0,0,0,2016


In [141]:
profile.to_csv("data/profile.csv")

In [146]:
# portfolio.index = portfolio.id
# portfolio = portfolio.drop(columns="id")
portfolio

Unnamed: 0_level_0,reward,channels,difficulty,duration,offer_type
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ae264e3637204a6fb9bb56bc8210ddfd,10,"[email, mobile, social]",10,7,bogo
4d5c57ea9a6940dd891ad53e9dbe8da0,10,"[web, email, mobile, social]",10,5,bogo
3f207df678b143eea3cee63160fa8bed,0,"[web, email, mobile]",0,4,informational
9b98b8c7a33c4b65b9aebfe6a799e6d9,5,"[web, email, mobile]",5,7,bogo
0b1e1539f2cc45b7b9fa7c272da2e1d7,5,"[web, email]",20,10,discount
2298d6c36e964ae4a3e7e9706d1fb8c2,3,"[web, email, mobile, social]",7,7,discount
fafdcd668e3743c1bb461111dcafc2a4,2,"[web, email, mobile, social]",10,10,discount
5a8bc65990b245e5a138643cd4eb9837,0,"[email, mobile, social]",0,3,informational
f19421c1d4aa40978ebb69ca19b0e20d,5,"[web, email, mobile, social]",5,5,bogo
2906b810c7d4411798c6938adc9daaa5,2,"[web, email, mobile]",10,7,discount


In [164]:
portfolio_pivot = portfolio.explode('channels') \
    .pivot_table(index=["id", "reward", "difficulty", "duration", "offer_type"], columns=["channels"], aggfunc="size") \
    .fillna(0)
portfolio_pivot

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,channels,email,mobile,social,web
id,reward,difficulty,duration,offer_type,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0b1e1539f2cc45b7b9fa7c272da2e1d7,5,20,10,discount,1.0,0.0,0.0,1.0
2298d6c36e964ae4a3e7e9706d1fb8c2,3,7,7,discount,1.0,1.0,1.0,1.0
2906b810c7d4411798c6938adc9daaa5,2,10,7,discount,1.0,1.0,0.0,1.0
3f207df678b143eea3cee63160fa8bed,0,0,4,informational,1.0,1.0,0.0,1.0
4d5c57ea9a6940dd891ad53e9dbe8da0,10,10,5,bogo,1.0,1.0,1.0,1.0
5a8bc65990b245e5a138643cd4eb9837,0,0,3,informational,1.0,1.0,1.0,0.0
9b98b8c7a33c4b65b9aebfe6a799e6d9,5,5,7,bogo,1.0,1.0,0.0,1.0
ae264e3637204a6fb9bb56bc8210ddfd,10,10,7,bogo,1.0,1.0,1.0,0.0
f19421c1d4aa40978ebb69ca19b0e20d,5,5,5,bogo,1.0,1.0,1.0,1.0
fafdcd668e3743c1bb461111dcafc2a4,2,10,10,discount,1.0,1.0,1.0,1.0


In [169]:
portfolio_pivot = portfolio_pivot.reset_index()
portfolio_pivot

channels,index,id,reward,difficulty,duration,offer_type,email,mobile,social,web
0,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,5,20,10,discount,1.0,0.0,0.0,1.0
1,1,2298d6c36e964ae4a3e7e9706d1fb8c2,3,7,7,discount,1.0,1.0,1.0,1.0
2,2,2906b810c7d4411798c6938adc9daaa5,2,10,7,discount,1.0,1.0,0.0,1.0
3,3,3f207df678b143eea3cee63160fa8bed,0,0,4,informational,1.0,1.0,0.0,1.0
4,4,4d5c57ea9a6940dd891ad53e9dbe8da0,10,10,5,bogo,1.0,1.0,1.0,1.0
5,5,5a8bc65990b245e5a138643cd4eb9837,0,0,3,informational,1.0,1.0,1.0,0.0
6,6,9b98b8c7a33c4b65b9aebfe6a799e6d9,5,5,7,bogo,1.0,1.0,0.0,1.0
7,7,ae264e3637204a6fb9bb56bc8210ddfd,10,10,7,bogo,1.0,1.0,1.0,0.0
8,8,f19421c1d4aa40978ebb69ca19b0e20d,5,5,5,bogo,1.0,1.0,1.0,1.0
9,9,fafdcd668e3743c1bb461111dcafc2a4,2,10,10,discount,1.0,1.0,1.0,1.0


In [174]:
portfolio_pivot.index = portfolio_pivot.id
portfolio_pivot

channels,index,id,reward,difficulty,duration,offer_type,email,mobile,social,web
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0b1e1539f2cc45b7b9fa7c272da2e1d7,0,0b1e1539f2cc45b7b9fa7c272da2e1d7,5,20,10,discount,1.0,0.0,0.0,1.0
2298d6c36e964ae4a3e7e9706d1fb8c2,1,2298d6c36e964ae4a3e7e9706d1fb8c2,3,7,7,discount,1.0,1.0,1.0,1.0
2906b810c7d4411798c6938adc9daaa5,2,2906b810c7d4411798c6938adc9daaa5,2,10,7,discount,1.0,1.0,0.0,1.0
3f207df678b143eea3cee63160fa8bed,3,3f207df678b143eea3cee63160fa8bed,0,0,4,informational,1.0,1.0,0.0,1.0
4d5c57ea9a6940dd891ad53e9dbe8da0,4,4d5c57ea9a6940dd891ad53e9dbe8da0,10,10,5,bogo,1.0,1.0,1.0,1.0
5a8bc65990b245e5a138643cd4eb9837,5,5a8bc65990b245e5a138643cd4eb9837,0,0,3,informational,1.0,1.0,1.0,0.0
9b98b8c7a33c4b65b9aebfe6a799e6d9,6,9b98b8c7a33c4b65b9aebfe6a799e6d9,5,5,7,bogo,1.0,1.0,0.0,1.0
ae264e3637204a6fb9bb56bc8210ddfd,7,ae264e3637204a6fb9bb56bc8210ddfd,10,10,7,bogo,1.0,1.0,1.0,0.0
f19421c1d4aa40978ebb69ca19b0e20d,8,f19421c1d4aa40978ebb69ca19b0e20d,5,5,5,bogo,1.0,1.0,1.0,1.0
fafdcd668e3743c1bb461111dcafc2a4,9,fafdcd668e3743c1bb461111dcafc2a4,2,10,10,discount,1.0,1.0,1.0,1.0


In [175]:
portfolio_pivot = portfolio_pivot.drop(columns='id')
portfolio_pivot.columns.name = ""

In [182]:
portfolio_dummies = pd.get_dummies(portfolio_pivot)
portfolio_dummies

Unnamed: 0_level_0,index,reward,difficulty,duration,email,mobile,social,web,offer_type_bogo,offer_type_discount,offer_type_informational
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
0b1e1539f2cc45b7b9fa7c272da2e1d7,0,5,20,10,1.0,0.0,0.0,1.0,0,1,0
2298d6c36e964ae4a3e7e9706d1fb8c2,1,3,7,7,1.0,1.0,1.0,1.0,0,1,0
2906b810c7d4411798c6938adc9daaa5,2,2,10,7,1.0,1.0,0.0,1.0,0,1,0
3f207df678b143eea3cee63160fa8bed,3,0,0,4,1.0,1.0,0.0,1.0,0,0,1
4d5c57ea9a6940dd891ad53e9dbe8da0,4,10,10,5,1.0,1.0,1.0,1.0,1,0,0
5a8bc65990b245e5a138643cd4eb9837,5,0,0,3,1.0,1.0,1.0,0.0,0,0,1
9b98b8c7a33c4b65b9aebfe6a799e6d9,6,5,5,7,1.0,1.0,0.0,1.0,1,0,0
ae264e3637204a6fb9bb56bc8210ddfd,7,10,10,7,1.0,1.0,1.0,0.0,1,0,0
f19421c1d4aa40978ebb69ca19b0e20d,8,5,5,5,1.0,1.0,1.0,1.0,1,0,0
fafdcd668e3743c1bb461111dcafc2a4,9,2,10,10,1.0,1.0,1.0,1.0,0,1,0


In [183]:
portfolio_dummies.to_csv('data/portfolio-dummy.csv')