In [4]:
# load dependencies
import tweepy
import dataset
import json
from sqlalchemy.exc import ProgrammingError
import textblob
import time
import numpy as np
import pandas as pd
import sqlite3
import os

# create sqlite db if none exists, else append
db = dataset.connect("sqlite:///tweets_lse.db")

# set api constants
TWITTER_APP_KEY = 'pfekrUmzHx0WY4gNjQaIsjmjH'
TWITTER_APP_SECRET = 'OkZxpDJIMEtttGnUWigqhGvK4WqNnZLDLg79codmz56JZBhO9Q'
TWITTER_KEY = '1439343834-7xzhsIyOJYETIwiMaaZXj3qm1gQq0bge1QwgwRy'
TWITTER_SECRET = 'wjyIx0p1l7P2OB9jarfmRR500yzn7MOogJiSA9HEAh4qz'

# set OAuth authentication
auth = tweepy.OAuthHandler(TWITTER_APP_KEY, TWITTER_APP_SECRET)
auth.set_access_token(TWITTER_KEY, TWITTER_SECRET)

# create API object to pull twitter data, passing in authentication
api = tweepy.API(auth, 
                 retry_count = 5, # default number of retries to attempt when error occurs
                 retry_delay = 5, # number of seconds to wait between retries
                 wait_on_rate_limit = True, # Whether or not to automatically wait for rate limits to replenish
                 wait_on_rate_limit_notify = True) # Whether or not to print a notification when Tweepy is waiting for rate limits to replenish

# create class that inherits from streamlistener object
class StreamListener(tweepy.StreamListener):
    
    # override on_status method to define our own functionality     
    def on_status(self, status):
        
        # print all tweets that are not retweets         
        if hasattr(status, 'retweeted_status'):
            return
        
        description = status.user.description
        loc = status.user.location
        retweets = status.retweet_count
        favourites = status.favorite_count
        followers_count = status.user.followers_count
        text = status.text
        place = status.place.full_name
        coords = status.coordinates
        name = status.user.screen_name
        created_at = status.created_at
        
        blob = textblob.TextBlob(text)
        sentiment = blob.sentiment
        
        # only return geotagged tweets         
        if coords is None:
            return
            
        # encode to json         
        if coords is not None:
            coords = json.dumps(coords)
            coords = json.loads(coords)            
            lon = coords['coordinates'][0]
            lat = coords['coordinates'][1]
        
        # create reference point for db         
        table = db["tweets"]
        
        # try to insert to sqlite db         
        try:
            table.insert(dict(
                created_at = created_at,
                retweets = retweets,
                favourites = favourites,
                user_description=description,
                followers_count = followers_count,
                user_location=loc,
                place=place,
                lat=lat,
                lon=lon,
                text=text,
                user_name=name,
                polarity = sentiment.polarity,
                subjectivity = sentiment.subjectivity
            ))
        # catch any error          
        except ProgrammingError as err:
            logging.INFO(err)

    def on_error(self, status_code):
        # if being rate limited, return false         
        if status_code == 420:
            return False
        
def timeout_test(func):
    
    def wrapper(*args):
        t = time.clock()
        res = func(*args)
        print(func.__name__, time.clock() - t)
        return res
    
    return wrapper

@timeout_test # scrap = timer(call_scraper)
def call_scraper(geo):
    # instantiate streamlistener object from class
    stream_listener = StreamListener()

    # connect to twitter API to create stream object
    stream = tweepy.Stream(auth=api.auth, listener=stream_listener)

    # apply custom filtering
    stream.filter(locations=geo, languages=['en'])

In [55]:
import logging

# set logger
logger = logging.getLogger()
fhandler = logging.FileHandler(filename='twitter_log.log', mode='a')
formatter = logging.Formatter('%(asctime)s %(message)s')
fhandler.setFormatter(formatter)
logger.addHandler(fhandler)
logger.setLevel(logging.DEBUG)

# set geo
bbox_geo = [-10.8544921875, 49.82380908513249, 2.021484375, 59.478568831926395]

# call function with decorator method
call_scraper(bbox_geo)

KeyboardInterrupt: 

In [56]:
# connect to sqlite database
db = dataset.connect("sqlite:///tweets_lse.db")

# return all from database instance
result = db["tweets"].all()

# dump sqlite to csv
dataset.freeze(result, format='csv', filename='twitter_lse.csv')

In [5]:
cnx = sqlite3.connect(os.path.join(os.path.dirname('__file__'), 'tweets_lse.db'))

#  query sqlite table
lse_df = pd.read_sql_query('SELECT * FROM tweets', cnx)

lse_df

Unnamed: 0,id,created_at,retweets,favourites,user_description,followers_count,user_location,place,lat,lon,text,user_name,polarity,subjectivity
0,1,2017-07-07 11:50:04.000000,0,0,"Celebrity MakeupArtist for TV/Film, Music Vide...",2023,"Slough, Heathrow London.","Slough, South East",51.51,-0.5931,FEMALE MODELS NEEDED for NEXT WEEK - 11am to 3...,RizKhanMua,0.0,0.083333
1,2,2017-07-07 11:50:06.000000,0,0,Real Time Solar updates from Oxford. See the s...,48,Oxford,"Oxford, England",51.724849,-1.232882,2879 watts.\n73% Battery = 1.46kWh https://t.c...,OxfordSolarLive,0.0,0.0
2,3,2017-07-07 11:50:07.000000,0,0,Weather conditions updated every 10 minutes fr...,828,"Woking, England","Woking, South East",51.332222,-0.557778,Tmp 32.9°C Wind 10mph Press 1013.2mb Cloud 900...,WeatherWoking,0.208333,0.75
3,4,2017-07-07 11:50:08.000000,0,0,(As Seen On The Only Way Is Essex) •Sunbeds •H...,8810,"217 Highstreet Epping, Essex","Hertford, East",51.796091,-0.076074,✨✨👌🏽FRESH HAIR FRIDAY!!👌🏽✨✨\n\n•Blow By Junior...,SoGlamEssex,0.0,0.0
4,5,2017-07-07 11:50:09.000000,0,0,"International stylist, colour evangelist. Spea...",573,"London, England","Ealing, London",51.5111,-0.307228,"Do your thing your way, it's the only way.\n\n...",Jobaldwintrott,0.216667,0.716667
5,6,2017-07-07 11:50:09.000000,0,0,| skins & ivories |,1544,London,"Islington, London",51.53437,-0.106129,last night's fun. @ O2 Academy Islington https...,jimmolyneux,0.15,0.133333
6,7,2017-07-07 11:50:12.000000,0,0,"Hi I am Mitchell! I am 23 years old, love Poké...",7,"Perth, Western Australia","Merton, London",51.43436,-0.214262,Finally at #Wimbledon for the first time!! Goi...,mitchoishere,0.347656,0.408333
7,8,2017-07-07 11:50:15.000000,0,0,방탄소년단 —— teamwork makes the dream work. #정국 ♡,221,,"Bicester, England",51.89204,-1.155603,"(at @BicesterVillage in Bicester, Oxfordshire)...",palmypattareeya,0.0,0.0
8,9,2017-07-07 11:50:17.000000,0,0,"Farm vet, Yorkshire.",58,"York, England","Murton, England",53.96372,-1.00962,Just in time for the weekend!! #craftginclub @...,ashmarshallvet,0.0,0.0
9,10,2017-07-07 11:50:22.000000,0,0,"Cancer survivor and blogger, CSR and animal lo...",586,London,"East, England",51.72795,-0.469812,Amazing #wedding last night! @ Shendish Manor ...,alexsam_89,0.3,0.483333


In [1]:
import json
import random
import sys

from expects import expect, equal
from expects.matchers.built_in import be_above
from expects.matchers.built_in.have_keys import have_key

import pytest
from requests import *
from pprint import pprint

sys.path.insert(0, './python')

import conftest
import factories

In [6]:
content = lse_df.loc[0]['text']
content

'FEMALE MODELS NEEDED for NEXT WEEK - 11am to 3pm -TFP HAIR N MAKEUP on - mon 10th july, tues… https://t.co/1f66MQ4Weh'

In [8]:
# define constants
BASE_URL = 'https://api.receptiviti.com/'
APIKEY = '595f54e5572c16057117ff51'
APISECRET = 'DDG2alP30PiobcqCuyspe57U9hK780FuoGUG2mC4NiQ'

@pytest.mark.person_api
def test_create_person_with_content(baseurl, apikey, apisecret):
    content_data = factories.get_content_data()
    person_data = factories.get_person_data(content_data)

    person_api_url = conftest.person_api_url(baseurl)
    auth_headers = conftest.auth_headers(apikey, apisecret)
    
    response = post(person_api_url, json=person_data, headers=auth_headers)

    response_json = json.loads(response.content)
    expect(response.status_code).to(equal(200))
    expect(response_json["name"]).to(equal(person_data["name"]))
    expect(response_json["contents"][0]).to(have_key("receptiviti_scores"))
    expect(response_json["contents"][0]).to(have_key("liwc_scores"))
    
    return response_json

test_case = test_create_person_with_content(baseurl=BASE_URL,
                                apikey=APIKEY,
                                apisecret=APISECRET)

pprint(test_case)

{'_id': '595f7769572c16057117ff6c',
 '_links': {'contents': {'href': '/v2/api/person/595f7769572c16057117ff6c/contents',
                         'method': 'GET'},
            'self': {'href': '/v2/api/person/595f7769572c16057117ff6c',
                     'method': 'GET'}},
 'content_count': 1,
 'contents': [{'_id': '595f7769572c16057117ff6d',
               'communication_recommendation': 'Try to focus on the '
                                               'here-and-now instead of the '
                                               "future. Don't be afraid to use "
                                               'humor to lighten the '
                                               'conversation. Give them some '
                                               "time to talk - they'll tell "
                                               "you what they're thinking.",
               'content_date': '2017-07-07T12:58:29.661000+00:00',
               'content_handle': '8ba77e529bda4e438f

                                                   'self_assured': ['Insufficient '
                                                                    'word '
                                                                    'count. '
                                                                    'Requires '
                                                                    'a minimum '
                                                                    'of 500 '
                                                                    'words for '
                                                                    'accurate '
                                                                    'results.'],
                                                   'self_conscious': ['Insufficient '
                                                                      'word '
                                                                      'count. '
                                     