# Retweet and Like Counts for Random Sample

### Step 1: Load Retweets H5 File and Remove Missing Values

In [15]:
import pandas as pd
import numpy as np
pd.options.display.max_columns = 0
pd.set_option('display.float_format', lambda x: '%.2f' % x)
pd.set_option('display.max_colwidth', -1)

In [None]:
hdf = pd.HDFStore("retweets.h5")
rt_df = hdf["tweets"]
rt_df

In [None]:
rt_df[rt_df.rt_retweet_count=='NA']

In [None]:
rt_df = rt_df[rt_df.rt_retweet_count!='NA']

In [None]:
rt_df

### Step 2: Group By RT ID and Calculate Maximum RT and Like Counts for Each ID

In [None]:
grouped_rt_df = rt_df.groupby('rt_id_str').agg({'rt_retweet_count': 'max', 'rt_favorite_count': 'max'})

In [None]:
type(grouped_rt_df)

In [None]:
grouped_rt_df.index

In [None]:
grouped_rt_df.info()

In [None]:
grouped_rt_df.loc['739879672092184577']

### Step 3: Export Grouped Dataframe to a CSV; Import and Rename Columns

In [None]:
grouped_rt_df.to_csv('retweets_and_likes_from_dataframe.csv')

In [2]:
grouped_rt_df = pd.read_csv('retweets_and_likes_from_dataframe.csv')

In [3]:
grouped_rt_df.head()

Unnamed: 0,rt_id_str,rt_retweet_count,rt_favorite_count
0,113602306414620672,2,1
1,160107814801522688,2,0
2,193284718534475777,2,0
3,199877960612843520,5,2
4,208333963587104768,3,0


In [4]:
grouped_rt_df.columns = ['tweet_id', 'rt_count_data', 'like_count_data']
grouped_rt_df.head()

Unnamed: 0,tweet_id,rt_count_data,like_count_data
0,113602306414620672,2,1
1,160107814801522688,2,0
2,193284718534475777,2,0
3,199877960612843520,5,2
4,208333963587104768,3,0


### Step 4: Load Random Sample as a Dataframe and Merge on Tweet ID Column

In [None]:
sample_df = pd.read_excel('4000_sample_v3.xlsx')
sample_df

In [None]:
merged_df = pd.merge(sample_df, grouped_rt_df, on='tweet_id', how='left')
merged_df

### Step 5: Replace Missing Values in RT Count Column with 0s

In [None]:
merged_df.rt_count_data.fillna(0, inplace=True)
merged_df

In [None]:
merged_df[merged_df.tweet_id==736977008723386368]

### Step 6: Export Merged Dataframe to CSV

In [None]:
merged_df.to_csv('4000_sample_v4.csv')

# Retweet and Like Counts for All Original Tweets

### Step 1: Load Original Tweets H5 File

In [5]:
hdf = pd.HDFStore("tweets_processed.h5")
df = hdf["tweets"]
df

Unnamed: 0,created_at,date,day_numeric,day,time,hour,hour_binned,days_before_ref,account_age,source,in_reply_to_status_id,in_reply_to_user_id,coordinates,tweet_url,text,user_name,user_screen_name,user_description,user_verified,user_followers_count,user_friends_count,user_created_at,user_location,user_time_zone,user_statuses_count,user_favourites_count,hashtags,hashtags_count,mentions,mentions_count,urls,urls_count,urls_count_binary,media_urls,media_type_count,media_type_count_binary,media_type,photo,video,gif
720824650373029889,2016-04-15 05:02:43,2016-04-15,4,Friday,05:02:43,05,Small hours,69,0.22,Twitter Web Client,,,,https://www.twitter.com/SJDelahunty72/status/7...,Referendum Party: Election Video https://t.co...,Steven J Delahunty,SJDelahunty72,@TrentUni Economics & Ex @RoyalAirForce (joine...,False,88,15,2016-01-23,"Nottingham, England",,9093,2312,"[bbcbreakfast, gmb, euref, bbcqt]",4,[YouTube],1,[https://t.co/0yCnmwO7w7],1,1,[pic.twitter.com/CDAM1jMzeZ],1,1,[photo],1,0,0
720824890891218946,2016-04-15 05:03:40,2016-04-15,4,Friday,05:03:40,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,The best couple 4Ever 💟 💟 #Britin #QAF https:/...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4543,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/s4SOtpA8ya],1,1,[animated_gif],0,0,1
720825083023925248,2016-04-15 05:04:26,2016-04-15,4,Friday,05:04:26,05,Small hours,69,1.21,Twitter for iPhone,,,,https://www.twitter.com/gabididit/status/72082...,#DemDebate great and all but still waiting for...,Gabrielle Belli,gabididit,M.A. student researching and writing about acr...,False,61,179,2015-01-30,"New York, NY",Eastern Time (US & Canada),172,159,"[demdebate, drones, abortion, globalsouth, ref...",6,[],0,[],0,0,[],0,0,[],0,0,0
720825114116231168,2016-04-15 05:04:34,2016-04-15,4,Friday,05:04:34,05,Small hours,69,6.26,Twitter Web Client,707714714042822657,3241779670,,https://www.twitter.com/DrAlfOldman/status/720...,@BrexitWatch This is scary but not surprising....,Alf Oldman,DrAlfOldman,"Blogs about politics, people & travel. Expert ...",False,4493,136,2010-01-11,"Latchi, Cyprus",Athens,30298,319,[],0,[BrexitWatch],1,[],0,0,[],0,0,[],0,0,0
720825272447021057,2016-04-15 05:05:11,2016-04-15,4,Friday,05:05:11,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,AND WATCHING YOU WALK AWAY 💟 💟 💟 ❤ 💕 #britin #...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4545,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/8Tf8FDVUnh],1,1,[animated_gif],0,0,1
720825677277061120,2016-04-15 05:06:48,2016-04-15,4,Friday,05:06:48,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#VoteLeave and Austerity Will End https://t.co...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3442,5,"[voteleave, eureferendum, euref, remain]",4,[],0,[https://t.co/OiGzHH50f6],1,1,[pic.twitter.com/xRPciOzi6Z],1,1,[photo],1,0,0
720825685481119744,2016-04-15 05:06:50,2016-04-15,4,Friday,05:06:50,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#Brexit #Remain #eureferendum BREXIT: Volcanoe...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3443,5,"[brexit, remain, eureferendum]",3,[],0,[https://t.co/0pSeyDUbNm],1,1,[],0,0,[],0,0,0
720825751310716930,2016-04-15 05:07:06,2016-04-15,4,Friday,05:07:06,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,Do you want live in EU dictatorship? https://t...,BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,715,20,"[eureferendum, brexit, remain, strongerin, inc...",5,[],0,[https://t.co/A2E0AHAZpj],1,1,[pic.twitter.com/N4fL7ko0ye],1,1,[photo],1,0,0
720825778351378433,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,"All the great things are simple, and many can ...",BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,716,20,"[brexit, euref]",2,[],0,[],0,0,[],0,0,[],0,0,0
720825780058521600,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.03,Tweet Jukebox,,,,https://www.twitter.com/EU_Failed/status/72082...,Selling Britain Off On the Cheap is Good Says ...,EUfailed,EU_Failed,,False,26,20,2016-04-02,,,78,1,"[eu, eureferendum, euref, brexit, voteleave]",5,[],0,[https://t.co/iXoWvXz3ok],1,1,[pic.twitter.com/zgVlzxXdSa],1,1,[photo],1,0,0


### Step 2: Set the Index of the RT Dataframe to the Tweet ID

In [6]:
grouped_rt_df.head()

Unnamed: 0,tweet_id,rt_count_data,like_count_data
0,113602306414620672,2,1
1,160107814801522688,2,0
2,193284718534475777,2,0
3,199877960612843520,5,2
4,208333963587104768,3,0


In [7]:
grouped_rt_df.set_index('tweet_id', inplace=True)
grouped_rt_df.head()

Unnamed: 0_level_0,rt_count_data,like_count_data
tweet_id,Unnamed: 1_level_1,Unnamed: 2_level_1
113602306414620672,2,1
160107814801522688,2,0
193284718534475777,2,0
199877960612843520,5,2
208333963587104768,3,0


### Step 3: Merge the Tweet and RT Dataframes on the Index

In [8]:
merged_df = df.join(grouped_rt_df, how='left')
merged_df

Unnamed: 0,created_at,date,day_numeric,day,time,hour,hour_binned,days_before_ref,account_age,source,in_reply_to_status_id,in_reply_to_user_id,coordinates,tweet_url,text,user_name,user_screen_name,user_description,user_verified,user_followers_count,user_friends_count,user_created_at,user_location,user_time_zone,user_statuses_count,user_favourites_count,hashtags,hashtags_count,mentions,mentions_count,urls,urls_count,urls_count_binary,media_urls,media_type_count,media_type_count_binary,media_type,photo,video,gif,rt_count_data,like_count_data
720824650373029889,2016-04-15 05:02:43,2016-04-15,4,Friday,05:02:43,05,Small hours,69,0.22,Twitter Web Client,,,,https://www.twitter.com/SJDelahunty72/status/7...,Referendum Party: Election Video https://t.co...,Steven J Delahunty,SJDelahunty72,@TrentUni Economics & Ex @RoyalAirForce (joine...,False,88,15,2016-01-23,"Nottingham, England",,9093,2312,"[bbcbreakfast, gmb, euref, bbcqt]",4,[YouTube],1,[https://t.co/0yCnmwO7w7],1,1,[pic.twitter.com/CDAM1jMzeZ],1,1,[photo],1,0,0,,
720824890891218946,2016-04-15 05:03:40,2016-04-15,4,Friday,05:03:40,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,The best couple 4Ever 💟 💟 #Britin #QAF https:/...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4543,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/s4SOtpA8ya],1,1,[animated_gif],0,0,1,13.0,32.0
720825083023925248,2016-04-15 05:04:26,2016-04-15,4,Friday,05:04:26,05,Small hours,69,1.21,Twitter for iPhone,,,,https://www.twitter.com/gabididit/status/72082...,#DemDebate great and all but still waiting for...,Gabrielle Belli,gabididit,M.A. student researching and writing about acr...,False,61,179,2015-01-30,"New York, NY",Eastern Time (US & Canada),172,159,"[demdebate, drones, abortion, globalsouth, ref...",6,[],0,[],0,0,[],0,0,[],0,0,0,,
720825114116231168,2016-04-15 05:04:34,2016-04-15,4,Friday,05:04:34,05,Small hours,69,6.26,Twitter Web Client,707714714042822657,3241779670,,https://www.twitter.com/DrAlfOldman/status/720...,@BrexitWatch This is scary but not surprising....,Alf Oldman,DrAlfOldman,"Blogs about politics, people & travel. Expert ...",False,4493,136,2010-01-11,"Latchi, Cyprus",Athens,30298,319,[],0,[BrexitWatch],1,[],0,0,[],0,0,[],0,0,0,,
720825272447021057,2016-04-15 05:05:11,2016-04-15,4,Friday,05:05:11,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,AND WATCHING YOU WALK AWAY 💟 💟 💟 ❤ 💕 #britin #...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4545,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/8Tf8FDVUnh],1,1,[animated_gif],0,0,1,8.0,29.0
720825677277061120,2016-04-15 05:06:48,2016-04-15,4,Friday,05:06:48,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#VoteLeave and Austerity Will End https://t.co...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3442,5,"[voteleave, eureferendum, euref, remain]",4,[],0,[https://t.co/OiGzHH50f6],1,1,[pic.twitter.com/xRPciOzi6Z],1,1,[photo],1,0,0,,
720825685481119744,2016-04-15 05:06:50,2016-04-15,4,Friday,05:06:50,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#Brexit #Remain #eureferendum BREXIT: Volcanoe...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3443,5,"[brexit, remain, eureferendum]",3,[],0,[https://t.co/0pSeyDUbNm],1,1,[],0,0,[],0,0,0,,
720825751310716930,2016-04-15 05:07:06,2016-04-15,4,Friday,05:07:06,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,Do you want live in EU dictatorship? https://t...,BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,715,20,"[eureferendum, brexit, remain, strongerin, inc...",5,[],0,[https://t.co/A2E0AHAZpj],1,1,[pic.twitter.com/N4fL7ko0ye],1,1,[photo],1,0,0,,
720825778351378433,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,"All the great things are simple, and many can ...",BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,716,20,"[brexit, euref]",2,[],0,[],0,0,[],0,0,[],0,0,0,,
720825780058521600,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.03,Tweet Jukebox,,,,https://www.twitter.com/EU_Failed/status/72082...,Selling Britain Off On the Cheap is Good Says ...,EUfailed,EU_Failed,,False,26,20,2016-04-02,,,78,1,"[eu, eureferendum, euref, brexit, voteleave]",5,[],0,[https://t.co/iXoWvXz3ok],1,1,[pic.twitter.com/zgVlzxXdSa],1,1,[photo],1,0,0,,


### Step 4: Replace Missing Values in RT Count Column with 0s

In [9]:
merged_df.rt_count_data.fillna(0, inplace=True)
merged_df

Unnamed: 0,created_at,date,day_numeric,day,time,hour,hour_binned,days_before_ref,account_age,source,in_reply_to_status_id,in_reply_to_user_id,coordinates,tweet_url,text,user_name,user_screen_name,user_description,user_verified,user_followers_count,user_friends_count,user_created_at,user_location,user_time_zone,user_statuses_count,user_favourites_count,hashtags,hashtags_count,mentions,mentions_count,urls,urls_count,urls_count_binary,media_urls,media_type_count,media_type_count_binary,media_type,photo,video,gif,rt_count_data,like_count_data
720824650373029889,2016-04-15 05:02:43,2016-04-15,4,Friday,05:02:43,05,Small hours,69,0.22,Twitter Web Client,,,,https://www.twitter.com/SJDelahunty72/status/7...,Referendum Party: Election Video https://t.co...,Steven J Delahunty,SJDelahunty72,@TrentUni Economics & Ex @RoyalAirForce (joine...,False,88,15,2016-01-23,"Nottingham, England",,9093,2312,"[bbcbreakfast, gmb, euref, bbcqt]",4,[YouTube],1,[https://t.co/0yCnmwO7w7],1,1,[pic.twitter.com/CDAM1jMzeZ],1,1,[photo],1,0,0,0.0,
720824890891218946,2016-04-15 05:03:40,2016-04-15,4,Friday,05:03:40,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,The best couple 4Ever 💟 💟 #Britin #QAF https:/...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4543,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/s4SOtpA8ya],1,1,[animated_gif],0,0,1,13.0,32.0
720825083023925248,2016-04-15 05:04:26,2016-04-15,4,Friday,05:04:26,05,Small hours,69,1.21,Twitter for iPhone,,,,https://www.twitter.com/gabididit/status/72082...,#DemDebate great and all but still waiting for...,Gabrielle Belli,gabididit,M.A. student researching and writing about acr...,False,61,179,2015-01-30,"New York, NY",Eastern Time (US & Canada),172,159,"[demdebate, drones, abortion, globalsouth, ref...",6,[],0,[],0,0,[],0,0,[],0,0,0,0.0,
720825114116231168,2016-04-15 05:04:34,2016-04-15,4,Friday,05:04:34,05,Small hours,69,6.26,Twitter Web Client,707714714042822657,3241779670,,https://www.twitter.com/DrAlfOldman/status/720...,@BrexitWatch This is scary but not surprising....,Alf Oldman,DrAlfOldman,"Blogs about politics, people & travel. Expert ...",False,4493,136,2010-01-11,"Latchi, Cyprus",Athens,30298,319,[],0,[BrexitWatch],1,[],0,0,[],0,0,[],0,0,0,0.0,
720825272447021057,2016-04-15 05:05:11,2016-04-15,4,Friday,05:05:11,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,AND WATCHING YOU WALK AWAY 💟 💟 💟 ❤ 💕 #britin #...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4545,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/8Tf8FDVUnh],1,1,[animated_gif],0,0,1,8.0,29.0
720825677277061120,2016-04-15 05:06:48,2016-04-15,4,Friday,05:06:48,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#VoteLeave and Austerity Will End https://t.co...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3442,5,"[voteleave, eureferendum, euref, remain]",4,[],0,[https://t.co/OiGzHH50f6],1,1,[pic.twitter.com/xRPciOzi6Z],1,1,[photo],1,0,0,0.0,
720825685481119744,2016-04-15 05:06:50,2016-04-15,4,Friday,05:06:50,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#Brexit #Remain #eureferendum BREXIT: Volcanoe...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3443,5,"[brexit, remain, eureferendum]",3,[],0,[https://t.co/0pSeyDUbNm],1,1,[],0,0,[],0,0,0,0.0,
720825751310716930,2016-04-15 05:07:06,2016-04-15,4,Friday,05:07:06,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,Do you want live in EU dictatorship? https://t...,BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,715,20,"[eureferendum, brexit, remain, strongerin, inc...",5,[],0,[https://t.co/A2E0AHAZpj],1,1,[pic.twitter.com/N4fL7ko0ye],1,1,[photo],1,0,0,0.0,
720825778351378433,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,"All the great things are simple, and many can ...",BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,716,20,"[brexit, euref]",2,[],0,[],0,0,[],0,0,[],0,0,0,0.0,
720825780058521600,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.03,Tweet Jukebox,,,,https://www.twitter.com/EU_Failed/status/72082...,Selling Britain Off On the Cheap is Good Says ...,EUfailed,EU_Failed,,False,26,20,2016-04-02,,,78,1,"[eu, eureferendum, euref, brexit, voteleave]",5,[],0,[https://t.co/iXoWvXz3ok],1,1,[pic.twitter.com/zgVlzxXdSa],1,1,[photo],1,0,0,0.0,


In [10]:
merged_df.rename(columns={'rt_count_data':'rt_count', 'like_count_data':'like_count'}, inplace=True)
merged_df

Unnamed: 0,created_at,date,day_numeric,day,time,hour,hour_binned,days_before_ref,account_age,source,in_reply_to_status_id,in_reply_to_user_id,coordinates,tweet_url,text,user_name,user_screen_name,user_description,user_verified,user_followers_count,user_friends_count,user_created_at,user_location,user_time_zone,user_statuses_count,user_favourites_count,hashtags,hashtags_count,mentions,mentions_count,urls,urls_count,urls_count_binary,media_urls,media_type_count,media_type_count_binary,media_type,photo,video,gif,rt_count,like_count
720824650373029889,2016-04-15 05:02:43,2016-04-15,4,Friday,05:02:43,05,Small hours,69,0.22,Twitter Web Client,,,,https://www.twitter.com/SJDelahunty72/status/7...,Referendum Party: Election Video https://t.co...,Steven J Delahunty,SJDelahunty72,@TrentUni Economics & Ex @RoyalAirForce (joine...,False,88,15,2016-01-23,"Nottingham, England",,9093,2312,"[bbcbreakfast, gmb, euref, bbcqt]",4,[YouTube],1,[https://t.co/0yCnmwO7w7],1,1,[pic.twitter.com/CDAM1jMzeZ],1,1,[photo],1,0,0,0.0,
720824890891218946,2016-04-15 05:03:40,2016-04-15,4,Friday,05:03:40,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,The best couple 4Ever 💟 💟 #Britin #QAF https:/...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4543,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/s4SOtpA8ya],1,1,[animated_gif],0,0,1,13.0,32.0
720825083023925248,2016-04-15 05:04:26,2016-04-15,4,Friday,05:04:26,05,Small hours,69,1.21,Twitter for iPhone,,,,https://www.twitter.com/gabididit/status/72082...,#DemDebate great and all but still waiting for...,Gabrielle Belli,gabididit,M.A. student researching and writing about acr...,False,61,179,2015-01-30,"New York, NY",Eastern Time (US & Canada),172,159,"[demdebate, drones, abortion, globalsouth, ref...",6,[],0,[],0,0,[],0,0,[],0,0,0,0.0,
720825114116231168,2016-04-15 05:04:34,2016-04-15,4,Friday,05:04:34,05,Small hours,69,6.26,Twitter Web Client,707714714042822657,3241779670,,https://www.twitter.com/DrAlfOldman/status/720...,@BrexitWatch This is scary but not surprising....,Alf Oldman,DrAlfOldman,"Blogs about politics, people & travel. Expert ...",False,4493,136,2010-01-11,"Latchi, Cyprus",Athens,30298,319,[],0,[BrexitWatch],1,[],0,0,[],0,0,[],0,0,0,0.0,
720825272447021057,2016-04-15 05:05:11,2016-04-15,4,Friday,05:05:11,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,AND WATCHING YOU WALK AWAY 💟 💟 💟 ❤ 💕 #britin #...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4545,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/8Tf8FDVUnh],1,1,[animated_gif],0,0,1,8.0,29.0
720825677277061120,2016-04-15 05:06:48,2016-04-15,4,Friday,05:06:48,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#VoteLeave and Austerity Will End https://t.co...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3442,5,"[voteleave, eureferendum, euref, remain]",4,[],0,[https://t.co/OiGzHH50f6],1,1,[pic.twitter.com/xRPciOzi6Z],1,1,[photo],1,0,0,0.0,
720825685481119744,2016-04-15 05:06:50,2016-04-15,4,Friday,05:06:50,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#Brexit #Remain #eureferendum BREXIT: Volcanoe...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3443,5,"[brexit, remain, eureferendum]",3,[],0,[https://t.co/0pSeyDUbNm],1,1,[],0,0,[],0,0,0,0.0,
720825751310716930,2016-04-15 05:07:06,2016-04-15,4,Friday,05:07:06,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,Do you want live in EU dictatorship? https://t...,BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,715,20,"[eureferendum, brexit, remain, strongerin, inc...",5,[],0,[https://t.co/A2E0AHAZpj],1,1,[pic.twitter.com/N4fL7ko0ye],1,1,[photo],1,0,0,0.0,
720825778351378433,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,"All the great things are simple, and many can ...",BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,716,20,"[brexit, euref]",2,[],0,[],0,0,[],0,0,[],0,0,0,0.0,
720825780058521600,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.03,Tweet Jukebox,,,,https://www.twitter.com/EU_Failed/status/72082...,Selling Britain Off On the Cheap is Good Says ...,EUfailed,EU_Failed,,False,26,20,2016-04-02,,,78,1,"[eu, eureferendum, euref, brexit, voteleave]",5,[],0,[https://t.co/iXoWvXz3ok],1,1,[pic.twitter.com/zgVlzxXdSa],1,1,[photo],1,0,0,0.0,


### Step 5: Add Binary Columns for RT and Like Counts

In [11]:
merged_df['rt_count_binary'] = np.where(merged_df['rt_count'] > 0, 1, 0)
merged_df

Unnamed: 0,created_at,date,day_numeric,day,time,hour,hour_binned,days_before_ref,account_age,source,in_reply_to_status_id,in_reply_to_user_id,coordinates,tweet_url,text,user_name,user_screen_name,user_description,user_verified,user_followers_count,user_friends_count,user_created_at,user_location,user_time_zone,user_statuses_count,user_favourites_count,hashtags,hashtags_count,mentions,mentions_count,urls,urls_count,urls_count_binary,media_urls,media_type_count,media_type_count_binary,media_type,photo,video,gif,rt_count,like_count,rt_count_binary
720824650373029889,2016-04-15 05:02:43,2016-04-15,4,Friday,05:02:43,05,Small hours,69,0.22,Twitter Web Client,,,,https://www.twitter.com/SJDelahunty72/status/7...,Referendum Party: Election Video https://t.co...,Steven J Delahunty,SJDelahunty72,@TrentUni Economics & Ex @RoyalAirForce (joine...,False,88,15,2016-01-23,"Nottingham, England",,9093,2312,"[bbcbreakfast, gmb, euref, bbcqt]",4,[YouTube],1,[https://t.co/0yCnmwO7w7],1,1,[pic.twitter.com/CDAM1jMzeZ],1,1,[photo],1,0,0,0.0,,0
720824890891218946,2016-04-15 05:03:40,2016-04-15,4,Friday,05:03:40,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,The best couple 4Ever 💟 💟 #Britin #QAF https:/...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4543,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/s4SOtpA8ya],1,1,[animated_gif],0,0,1,13.0,32.0,1
720825083023925248,2016-04-15 05:04:26,2016-04-15,4,Friday,05:04:26,05,Small hours,69,1.21,Twitter for iPhone,,,,https://www.twitter.com/gabididit/status/72082...,#DemDebate great and all but still waiting for...,Gabrielle Belli,gabididit,M.A. student researching and writing about acr...,False,61,179,2015-01-30,"New York, NY",Eastern Time (US & Canada),172,159,"[demdebate, drones, abortion, globalsouth, ref...",6,[],0,[],0,0,[],0,0,[],0,0,0,0.0,,0
720825114116231168,2016-04-15 05:04:34,2016-04-15,4,Friday,05:04:34,05,Small hours,69,6.26,Twitter Web Client,707714714042822657,3241779670,,https://www.twitter.com/DrAlfOldman/status/720...,@BrexitWatch This is scary but not surprising....,Alf Oldman,DrAlfOldman,"Blogs about politics, people & travel. Expert ...",False,4493,136,2010-01-11,"Latchi, Cyprus",Athens,30298,319,[],0,[BrexitWatch],1,[],0,0,[],0,0,[],0,0,0,0.0,,0
720825272447021057,2016-04-15 05:05:11,2016-04-15,4,Friday,05:05:11,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,AND WATCHING YOU WALK AWAY 💟 💟 💟 ❤ 💕 #britin #...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4545,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/8Tf8FDVUnh],1,1,[animated_gif],0,0,1,8.0,29.0,1
720825677277061120,2016-04-15 05:06:48,2016-04-15,4,Friday,05:06:48,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#VoteLeave and Austerity Will End https://t.co...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3442,5,"[voteleave, eureferendum, euref, remain]",4,[],0,[https://t.co/OiGzHH50f6],1,1,[pic.twitter.com/xRPciOzi6Z],1,1,[photo],1,0,0,0.0,,0
720825685481119744,2016-04-15 05:06:50,2016-04-15,4,Friday,05:06:50,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#Brexit #Remain #eureferendum BREXIT: Volcanoe...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3443,5,"[brexit, remain, eureferendum]",3,[],0,[https://t.co/0pSeyDUbNm],1,1,[],0,0,[],0,0,0,0.0,,0
720825751310716930,2016-04-15 05:07:06,2016-04-15,4,Friday,05:07:06,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,Do you want live in EU dictatorship? https://t...,BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,715,20,"[eureferendum, brexit, remain, strongerin, inc...",5,[],0,[https://t.co/A2E0AHAZpj],1,1,[pic.twitter.com/N4fL7ko0ye],1,1,[photo],1,0,0,0.0,,0
720825778351378433,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,"All the great things are simple, and many can ...",BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,716,20,"[brexit, euref]",2,[],0,[],0,0,[],0,0,[],0,0,0,0.0,,0
720825780058521600,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.03,Tweet Jukebox,,,,https://www.twitter.com/EU_Failed/status/72082...,Selling Britain Off On the Cheap is Good Says ...,EUfailed,EU_Failed,,False,26,20,2016-04-02,,,78,1,"[eu, eureferendum, euref, brexit, voteleave]",5,[],0,[https://t.co/iXoWvXz3ok],1,1,[pic.twitter.com/zgVlzxXdSa],1,1,[photo],1,0,0,0.0,,0


In [12]:
merged_df['like_count_binary'] = np.where(merged_df['like_count'] > 0, 1, 0)
merged_df

Unnamed: 0,created_at,date,day_numeric,day,time,hour,hour_binned,days_before_ref,account_age,source,in_reply_to_status_id,in_reply_to_user_id,coordinates,tweet_url,text,user_name,user_screen_name,user_description,user_verified,user_followers_count,user_friends_count,user_created_at,user_location,user_time_zone,user_statuses_count,user_favourites_count,hashtags,hashtags_count,mentions,mentions_count,urls,urls_count,urls_count_binary,media_urls,media_type_count,media_type_count_binary,media_type,photo,video,gif,rt_count,like_count,rt_count_binary,like_count_binary
720824650373029889,2016-04-15 05:02:43,2016-04-15,4,Friday,05:02:43,05,Small hours,69,0.22,Twitter Web Client,,,,https://www.twitter.com/SJDelahunty72/status/7...,Referendum Party: Election Video https://t.co...,Steven J Delahunty,SJDelahunty72,@TrentUni Economics & Ex @RoyalAirForce (joine...,False,88,15,2016-01-23,"Nottingham, England",,9093,2312,"[bbcbreakfast, gmb, euref, bbcqt]",4,[YouTube],1,[https://t.co/0yCnmwO7w7],1,1,[pic.twitter.com/CDAM1jMzeZ],1,1,[photo],1,0,0,0.0,,0,0
720824890891218946,2016-04-15 05:03:40,2016-04-15,4,Friday,05:03:40,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,The best couple 4Ever 💟 💟 #Britin #QAF https:/...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4543,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/s4SOtpA8ya],1,1,[animated_gif],0,0,1,13.0,32.0,1,1
720825083023925248,2016-04-15 05:04:26,2016-04-15,4,Friday,05:04:26,05,Small hours,69,1.21,Twitter for iPhone,,,,https://www.twitter.com/gabididit/status/72082...,#DemDebate great and all but still waiting for...,Gabrielle Belli,gabididit,M.A. student researching and writing about acr...,False,61,179,2015-01-30,"New York, NY",Eastern Time (US & Canada),172,159,"[demdebate, drones, abortion, globalsouth, ref...",6,[],0,[],0,0,[],0,0,[],0,0,0,0.0,,0,0
720825114116231168,2016-04-15 05:04:34,2016-04-15,4,Friday,05:04:34,05,Small hours,69,6.26,Twitter Web Client,707714714042822657,3241779670,,https://www.twitter.com/DrAlfOldman/status/720...,@BrexitWatch This is scary but not surprising....,Alf Oldman,DrAlfOldman,"Blogs about politics, people & travel. Expert ...",False,4493,136,2010-01-11,"Latchi, Cyprus",Athens,30298,319,[],0,[BrexitWatch],1,[],0,0,[],0,0,[],0,0,0,0.0,,0,0
720825272447021057,2016-04-15 05:05:11,2016-04-15,4,Friday,05:05:11,05,Small hours,69,1.24,Twitter for Android,,,,https://www.twitter.com/BritinQaf/status/72082...,AND WATCHING YOU WALK AWAY 💟 💟 💟 ❤ 💕 #britin #...,Brian And Justin ❤,BritinQaf,"Argentina fans who love Brian and Justin , thi...",False,1057,206,2015-01-19,,,4545,2580,"[britin, qaf]",2,[],0,[],0,0,[pic.twitter.com/8Tf8FDVUnh],1,1,[animated_gif],0,0,1,8.0,29.0,1,1
720825677277061120,2016-04-15 05:06:48,2016-04-15,4,Friday,05:06:48,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#VoteLeave and Austerity Will End https://t.co...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3442,5,"[voteleave, eureferendum, euref, remain]",4,[],0,[https://t.co/OiGzHH50f6],1,1,[pic.twitter.com/xRPciOzi6Z],1,1,[photo],1,0,0,0.0,,0,0
720825685481119744,2016-04-15 05:06:50,2016-04-15,4,Friday,05:06:50,05,Small hours,69,8.47,Tweet Jukebox,,,,https://www.twitter.com/DAILYSQUIB/status/7208...,#Brexit #Remain #eureferendum BREXIT: Volcanoe...,Daily Squib News,DAILYSQUIB,CAUTION! The Daily Squib can be hazardous to y...,False,104369,2821,2007-10-26,All major cities worldwide,London,3443,5,"[brexit, remain, eureferendum]",3,[],0,[https://t.co/0pSeyDUbNm],1,1,[],0,0,[],0,0,0,0.0,,0,0
720825751310716930,2016-04-15 05:07:06,2016-04-15,4,Friday,05:07:06,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,Do you want live in EU dictatorship? https://t...,BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,715,20,"[eureferendum, brexit, remain, strongerin, inc...",5,[],0,[https://t.co/A2E0AHAZpj],1,1,[pic.twitter.com/N4fL7ko0ye],1,1,[photo],1,0,0,0.0,,0,0
720825778351378433,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.06,Tweet Jukebox,,,,https://www.twitter.com/RemainShame/status/720...,"All the great things are simple, and many can ...",BrexitNeverSurrender,RemainShame,The Spirit of a Great Man Lives On,False,74,60,2016-03-22,"Kent, England",,716,20,"[brexit, euref]",2,[],0,[],0,0,[],0,0,[],0,0,0,0.0,,0,0
720825780058521600,2016-04-15 05:07:12,2016-04-15,4,Friday,05:07:12,05,Small hours,69,0.03,Tweet Jukebox,,,,https://www.twitter.com/EU_Failed/status/72082...,Selling Britain Off On the Cheap is Good Says ...,EUfailed,EU_Failed,,False,26,20,2016-04-02,,,78,1,"[eu, eureferendum, euref, brexit, voteleave]",5,[],0,[https://t.co/iXoWvXz3ok],1,1,[pic.twitter.com/zgVlzxXdSa],1,1,[photo],1,0,0,0.0,,0,0


In [17]:
merged_df.rt_count_binary = merged_df.rt_count_binary.astype('object')
merged_df.like_count_binary = merged_df.like_count_binary.astype('object')

In [18]:
merged_df.describe()

Unnamed: 0,days_before_ref,account_age,user_followers_count,user_friends_count,user_statuses_count,user_favourites_count,hashtags_count,mentions_count,urls_count,media_type_count,rt_count,like_count
count,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,2576175.0,687538.0
mean,20.21,3.97,8577.35,1428.66,30185.08,4285.49,1.7,0.91,0.3,0.18,2.3,6.89
std,18.21,2.51,195168.89,5634.09,136703.75,13198.77,1.54,1.2,0.47,0.44,35.2,92.33
min,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,5.0,1.64,113.0,161.0,1249.0,80.0,1.0,0.0,0.0,0.0,0.0,0.0
50%,15.0,4.26,456.0,516.0,5147.0,588.0,1.0,0.0,0.0,0.0,0.0,1.0
75%,31.0,6.19,1612.0,1427.0,18127.5,2945.0,2.0,1.0,1.0,0.0,1.0,3.0
max,69.0,10.07,62517720.0,1570110.0,6210243.0,1229284.0,21.0,12.0,5.0,5.0,18254.0,45997.0


In [19]:
merged_df.loc[737601902125277184]

created_at                                               2016-05-31 12:09:32
date                                                              2016-05-31
day_numeric                                                                1
day                                                                  Tuesday
time                                                                12:09:32
hour                                                                      12
hour_binned                                                        Afternoon
days_before_ref                                                           23
account_age                                                             3.39
source                                                      Twitter for iPad
in_reply_to_status_id                                                   None
in_reply_to_user_id                                                     None
coordinates                                                             None

### Step 6: Save Results in Another H5 File

In [20]:
def save_h5(obj, filename):
    store = pd.HDFStore(filename)
    store["tweets"] = obj
    store.close()

save_h5(merged_df, "tweets_processed_with_dvs.h5")

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed-integer,key->block4_values] [items->['date', 'day_numeric', 'day', 'time', 'hour', 'hour_binned', 'source', 'in_reply_to_status_id', 'in_reply_to_user_id', 'coordinates', 'tweet_url', 'text', 'user_name', 'user_screen_name', 'user_description', 'user_created_at', 'user_location', 'user_time_zone', 'hashtags', 'mentions', 'urls', 'urls_count_binary', 'media_urls', 'media_type_count_binary', 'media_type', 'photo', 'video', 'gif', 'rt_count_binary', 'like_count_binary']]

