# Listener script

This script utilizes Tweepy to connect to Twitter's API and then store tweets in individual json files for each day. 

In [None]:
#This sets up the necessary access tokens and authentication to connect to API
#IMPORTS
import tweepy

#AUTHENTICATION
# Configure the following four for your Twitter account
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''

#SETTING API AUTHENTICATION
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True) 

In [None]:
#This creates the "Listener" class for the desired handling of tweets
#IMPORTS
import tweepy 
import json
import time
from datetime import datetime, timedelta 

tweepy.debug(True)

#LISTENER
class Listener(tweepy.StreamListener):    
    
    #Setting up path and information about date and time
    def on_connect(self):
        self.day = str(datetime.now()).split(" ")[0].split("-")[2]
        self.file = str(datetime.now()).split(' ')[0]
        #The path here determines where the files get stored
        self.path = 'Twitter_Data/' + self.file + '.json'
        print('Connected to API at ' + str(datetime.now()))
        
    #Storing tweets and checks whether it is still the same day (if not it starts a new file)    
    def on_data(self, data):
        if str(datetime.now()).split(" ")[0].split("-")[2] == self.day:
            dataj = json.loads(data)
            time = dataj['created_at']
            with open(self.path, 'a') as outfile:
                json.dump(dataj, outfile)
                outfile.write("\n")
            print('Tweet collected at ' + time)
            return True
        else:
            self.day = str(datetime.now()).split(" ")[0].split("-")[2]
            self.file = str(datetime.now()).split(' ')[0]
            self.path = 'Twitter_Data/' + self.file + '.json'
            print('New day, changing save file to ' + self.file)
            dataj = json.loads(data)
            time = dataj['created_at']
            with open(self.path, 'a') as outfile:
                json.dump(dataj, outfile)
                outfile.write("\n")
            print('Tweet collected at ' + time)
            return True
    
    #Defines error handling - stores errors into a error_log.txt in the desired folder.
    def on_error(self, status):
        error_log = str(datetime.now().replace(microsecond = 0)) + ":  The stream encountered the following error: " + str(status) + '\n'
        print(error_log)
        with open('Twitter_Data/error_log.txt', 'a') as outfile:
            outfile.write(error_log)
            outfile.write("\n")
        time.sleep(60)
        return True  
    def on_timeout(self):
        time_log = "%s :  The stream timed out and is being restarted \n" %datetime.now().replace(microsecond = 0)
        print(time_log)
        with open('Twitter_Data/error_log.txt', 'a') as outfile:
            outfile.write(time_log)
            outfile.write("\n")
        time.sleep(60)
        return True
           
    
        

In [None]:
#This first loads the list of keywords to include and then starts the Listener
#KEYWORDS

ecec = [
    'kinderopvangtoeslag', 'kindgebonden budget', 'kinderbijslag',
    'kinderopvang', 'kinder opvang', 'kinderdagverblijf',
    'kdv', 'gastouder', 'gastouders',
    'gastouderopvang', 'gastouder opvang', 'gastouderbureau',
    'peuterspeelzalen', 'peuterspeelzaal', 'peuterspeelplaats',
    'peutergroep', 'peutergroepen', 'buitenschoolseopvang',
    'buitenschoolse opvang', 'naschoolseopvang', 'naschoolse opvang',
    'naschoolse', 'BSO', 'voorschoolse opvang',
    'voorschoolse', 'voorschoolseopvang', 'oppas',
    'oppassers', 'babysitter', 'babysitters',
    'nanny', 'nannies'
    ]

lm_programmes = [
    'Participatiewet', 'Participatie wet', 'Gesubsidieerde arbeid',
    'opleiding', 'scholing', 'heropleiding',
    'omscholing', 'training', 'retraining',
    're-training', 'studie', 'studeren',
    'praktijktraining', 'werkervaringsplek', 'stage',
    'stage lopen', 'werkervaringsplek', 'werkervaring plek',
    'studeer en werkplek', 'studeer- en werkplek', 'traineeship',
    'Werkbedrijf', 'werk.nl', 'werkplein',
    'werkpleinen', 'arbeidsadviseur', 'uwv',
    'arbeidsbemiddelaar', 'arbeidsbemiddeling', 'loopbaan coach',
    'werk coach', 'WW-uitkering', 'uitkering',
    'bijstand', 'bijstandsuitkering', 'meewerkaftrek'
    ]

lm_employment = [
    'full-time werk', 'full time werk', 'fulltime werk',
    'full-time baan'  'full time baan', 'fulltime baan',
    'voltijd baan', 'voltijd werk', 'voltijdwerk',
    '1 fte', '1 wtf', 'deeltijd werk',
    'part-time werk', 'part time werk', 'deeltijd baan',
    'part-time baan', 'part time baan', 'vast contract',
    'vaste baan', 'vaste aanstelling', 'tijdelijk contract',
    'tijdelijke baan', 'tijdelijke aanstelling', 'uitzendcontract',
    'nul uren contract', '0 uren contract', 'zelfstandige zonder personeel',
    'zzp', "zzp'ers", "zzp'er",
    'zzper', 'zzpers', 'DBA modelovereenkomst',
    'schijnzelfstandigheid', 'loondienst', 'in loondienst',
    'eigen baas', 'eigen baas zijn'
    ]

lm_phrases = [
    'werkloosheid', 'werkeloosheid', 'werkloos',
    'zonder baan', 'jobless'  'in between jobs',
    'between jobs', 'in between two jobs', 'between two jobs',
    'onderbezetting', 'onderbezet', 'zoek naar werk',
    'kijken voor werk', 'een baan zoeken',
    'zoeken naar een baan', 'banen zoeken', 'passend werk',
    'passende arbeid', 'passende baan', 'passende job',
    'goed werk', 'slecht werk', 'beter werk',
    'betere kansen op werk', 'beter arbeidscontract', 'goed arbeidscontract',
    'slecht arbeidscontract', 'vacature', 'vacatures',
    'openstaande baan', 'vaardigheidseisen', 'ervaringseisen',
    'werkervaring', 'werkervaringseisen', 'competenties'
    ]

keywords = ecec + lm_programmes + lm_employment + lm_phrases

#RUNNING LISTENER
while True:
    try:
        twitterStream = tweepy.Stream(auth, Listener())
        twitterStream.filter(languages = ["nl"], track = keywords, stall_warnings = True) 
    except KeyboardInterrupt:
        print('Interrupted by User')
        break
    except:
        continue