In [None]:
import re
from bs4 import BeautifulSoup
from nltk.tokenize import WordPunctTokenizer
tok = WordPunctTokenizer()

pat1 = r'@[A-Za-z0-9_]+' # For removing mentions.
pat2 = r'https?://[^ ]+' # For removing links.
combined_pat = r'|'.join((pat1, pat2)) # For removing links.
www_pat = r'www.[^ ]+' # For removing links.
negations_dic = {"isn't":"is not", "aren't":"are not", "wasn't":"was not", "weren't":"were not",
                "haven't":"have not","hasn't":"has not","hadn't":"had not","won't":"will not",
                "wouldn't":"would not", "don't":"do not", "doesn't":"does not","didn't":"did not",
                "can't":"can not","couldn't":"could not","shouldn't":"should not","mightn't":"might not",
                "mustn't":"must not"} # For removing negations.
neg_pattern = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b') # For removing negations.

def tweet_cleaner(text):
  """ This cleans the tweets text content 
      so it can be machine readable.
  """
  soup = BeautifulSoup(text, 'lxml') # Decodes HTML.
  souped = soup.get_text()
  try: # UTF decoding.
    bom_removed = souped.decode("utf-8-sig").replace(u"\ufffd", "?")
  except:
    bom_removed = souped
  stripped = re.sub(combined_pat, '', bom_removed) # Gets rid of links.
  stripped = re.sub(www_pat, '', stripped) # Gets rid of links.
  lower_case = stripped.lower()
  neg_handled = neg_pattern.sub(lambda x: negations_dic[x.group()], lower_case)
  letters_only = re.sub("[^a-zA-Z]", " ", neg_handled) # Only lets letters in.
  words = [x for x  in tok.tokenize(letters_only) if len(x) > 1] # Below removes whitespace
  return (" ".join(words)).strip()

In [None]:
from tweepy import Stream
from tweepy import OAuthHandler
from tweepy.streaming import StreamListener
#from api_info import * # api_key, api_secret, access_token, access_secret
import os
import json
import datetime

ticker, name = "TSLA", "Tesla" # In the application this should be given as an input.
timeout = 5 # In seconds

endtime = datetime.datetime.now() + datetime.timedelta(seconds = timeout) # Sets timer.
f = open(f"{ticker}.csv", "a", encoding='utf-8') # Appends to file if it exists, creates if it doesn't.
if os.stat(f"{ticker}.csv").st_size == 0: # Creates title if file is empty.
  f.write("Time, Text\n")

class listener(StreamListener):
  """ This class outputs any tweet based on a keyword as soon
      as it's posted.
  """
  def on_data(self, data):
    tweet = json.loads(data) # Converts  string to dictionary.

    text = tweet_cleaner(tweet["text"])
    time = str(datetime.datetime.now())
  
    f.write(time + ", " + text + "\n") # Outputs information to file.
    f.flush() # IDK what this is if I don't put it here it doesn't work.

    if datetime.datetime.now() > endtime:
      print("it works")
      f.close()
      exit() # This line only works in .py files, not notebooks (it crashes)

  def on_error(self, status):
    print(status) # Prints the error if it exists.

auth = OAuthHandler(api_key, api_secret) # Initializes API.
auth.set_access_token(access_token, access_secret)

twitterStream = Stream(auth, listener(), tweet_mode="extended") # Calls listener to run.
twitterStream.filter(languages=["en"], track=[ticker, name])