### Gathering

In [1]:
import pandas as pd
import numpy as np
import requests
import tweepy
import tweepy_credentials as creds
import json
import time
import re

In [2]:
# create pandas DataFrame for the WeRateDogs Twitter archive
df_archive = pd.read_csv('data/twitter-archive-enhanced.csv')

# check
df_archive.head(3)

Unnamed: 0,tweet_id,in_reply_to_status_id,in_reply_to_user_id,timestamp,source,text,retweeted_status_id,retweeted_status_user_id,retweeted_status_timestamp,expanded_urls,rating_numerator,rating_denominator,name,doggo,floofer,pupper,puppo
0,892420643555336193,,,2017-08-01 16:23:56 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Phineas. He's a mystical boy. Only eve...,,,,https://twitter.com/dog_rates/status/892420643...,13,10,Phineas,,,,
1,892177421306343426,,,2017-08-01 00:17:27 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Tilly. She's just checking pup on you....,,,,https://twitter.com/dog_rates/status/892177421...,13,10,Tilly,,,,
2,891815181378084864,,,2017-07-31 00:18:03 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Archie. He is a rare Norwegian Pouncin...,,,,https://twitter.com/dog_rates/status/891815181...,12,10,Archie,,,,


In [3]:
## download, save, and create pandas DataFrame for the tweet image predictions
# create response object using requests.get() method
url = 'https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv'
response = requests.get(url)

# Check the status where 200 being the HTTP status code for the request has succeeded.
response

<Response [200]>

In [4]:
# download the file and save it as image_predictions.tsv
with open('data/image_predictions.tsv', 'wb') as file:
    file.write(response.content)

In [5]:
# load .tsv file to pandas DataFrame
df_img = pd.read_csv('data/image_predictions.tsv', sep='\t')

# check
df_img.head(3)

Unnamed: 0,tweet_id,jpg_url,img_num,p1,p1_conf,p1_dog,p2,p2_conf,p2_dog,p3,p3_conf,p3_dog
0,666020888022790149,https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg,1,Welsh_springer_spaniel,0.465074,True,collie,0.156665,True,Shetland_sheepdog,0.061428,True
1,666029285002620928,https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg,1,redbone,0.506826,True,miniature_pinscher,0.074192,True,Rhodesian_ridgeback,0.07201,True
2,666033412701032449,https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg,1,German_shepherd,0.596461,True,malinois,0.138584,True,bloodhound,0.116197,True


In [6]:
# import Twitter api developer credentials
consumer_key = creds.consumer_key
consumer_secret = creds.consumer_secret
access_token = creds.access_token
access_secret = creds.access_secret

In [7]:
# setup for twitter api, tweepy
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

---
Test

In [28]:
df_sample = df_archive.sample(30)

In [54]:
data = []
err = []
for i in df_sample['tweet_id']:
    try:
        data.append(api.get_status(i, tweet_mode='extended')._json)
    except:
        err.append(i)
        print('{} Not Found !'.format(i))

In [56]:
with open('data/tweet_sample.txt', mode='w') as file:
    json.dump(data, file)

In [57]:
data[0]

{'created_at': 'Thu Feb 25 16:53:11 +0000 2016',
 'id': 702899151802126337,
 'id_str': '702899151802126337',
 'full_text': 'Say hello to Luna. Her tongue is malfunctioning (tragic). 12/10 please enjoy (vid by @LilyArtz) https://t.co/F9aLnADVIw',
 'truncated': False,
 'display_text_range': [0, 119],
 'entities': {'hashtags': [],
  'symbols': [],
  'user_mentions': [{'screen_name': 'LilyArtz',
    'name': 'lily artz',
    'id': 2595631677,
    'id_str': '2595631677',
    'indices': [85, 94]}],
  'urls': [{'url': 'https://t.co/F9aLnADVIw',
    'expanded_url': 'https://vine.co/v/i6iIrBwnTFI',
    'display_url': 'vine.co/v/i6iIrBwnTFI',
    'indices': [96, 119]}]},
 'source': '<a href="http://vine.co" rel="nofollow">Vine - Make a Scene</a>',
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'in_reply_to_screen_name': None,
 'user': {'id': 4196983835,
  'id_str': '4196983835',
  'name': 'WeRateDogs™',
  'scree

In [58]:
len(data)

30

In [59]:
data2 = pd.read_json('data/tweet_sample.txt')

In [63]:
data2

Unnamed: 0,contributors,coordinates,created_at,display_text_range,entities,extended_entities,favorite_count,favorited,full_text,geo,...,lang,place,possibly_sensitive,possibly_sensitive_appealable,retweet_count,retweeted,retweeted_status,source,truncated,user
0,,,2016-02-25 16:53:11,"[0, 119]","{'hashtags': [], 'symbols': [], 'user_mentions...",,1672,False,Say hello to Luna. Her tongue is malfunctionin...,,...,en,,0.0,0.0,477,False,,"<a href=""http://vine.co"" rel=""nofollow"">Vine -...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
1,,,2017-04-24 15:13:52,"[0, 112]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 856526604033556482, 'id_str'...",11901,False,"THIS IS CHARLIE, MARK. HE DID JUST WANT TO SAY...",,...,en,,0.0,0.0,1915,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
2,,,2015-11-28 21:34:09,"[0, 95]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 670717326967205888, 'id_str'...",1214,False,*screams for a little bit and then crumples to...,,...,en,,0.0,0.0,504,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
3,,,2015-12-06 04:18:46,"[0, 134]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 673355872538599424, 'id_str'...",1500,False,This is Koda. She's a boss. Helps shift gears....,,...,en,,0.0,0.0,591,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
4,,,2017-07-15 23:25:31,"[0, 131]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 886366138128449536, 'id_str'...",20629,False,This is Roscoe. Another pupper fallen victim t...,,...,en,,0.0,0.0,3097,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
5,,,2016-11-24 18:28:13,"[0, 63]","{'hashtags': [], 'symbols': [], 'user_mentions...",,1870,False,.@NBCSports OMG THE TINY HAT I'M GOING TO HAVE...,,...,en,,,,254,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
6,,,2016-02-17 18:49:22,"[0, 138]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 700029279581708288, 'id_str'...",2150,False,This is Coops. His ship is taking on water. So...,,...,en,,0.0,0.0,613,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
7,,,2017-01-18 01:01:34,"[0, 117]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 821522878252384256, 'id_str'...",8450,False,This is Harlso. He has a really good idea but ...,,...,en,,0.0,0.0,1913,False,,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
8,,,2017-04-24 20:17:23,"[0, 139]","{'hashtags': [], 'symbols': [], 'user_mentions...",,0,False,RT @dog_rates: This is Luna. It's her first ti...,,...,en,,,,10862,False,{'created_at': 'Thu Mar 23 00:18:10 +0000 2017...,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
9,,,2016-09-01 02:21:21,"[0, 112]","{'hashtags': [], 'symbols': [], 'user_mentions...","{'media': [{'id': 673320125483892736, 'id_str'...",0,False,RT @dog_rates: This is Frankie. He's wearing b...,,...,en,,0.0,0.0,8115,False,{'created_at': 'Sun Dec 06 01:56:44 +0000 2015...,"<a href=""http://twitter.com/download/iphone"" r...",False,"{'id': 4196983835, 'id_str': '4196983835', 'na..."
