In [54]:
import re
from bs4 import BeautifulSoup
from nltk.tokenize import WordPunctTokenizer
tok = WordPunctTokenizer()

pat1 = r'@[A-Za-z0-9_]+' # For removing mentions.
pat2 = r'https?://[^ ]+' # For removing links.
combined_pat = r'|'.join((pat1, pat2)) # For removing links.
www_pat = r'www.[^ ]+' # For removing links.
negations_dic = {"isn't":"is not", "aren't":"are not", "wasn't":"was not", "weren't":"were not",
                "haven't":"have not","hasn't":"has not","hadn't":"had not","won't":"will not",
                "wouldn't":"would not", "don't":"do not", "doesn't":"does not","didn't":"did not",
                "can't":"can not","couldn't":"could not","shouldn't":"should not","mightn't":"might not",
                "mustn't":"must not"} # For removing negations.
neg_pattern = re.compile(r'\b(' + '|'.join(negations_dic.keys()) + r')\b') # For removing negations.

def tweet_cleaner(text):
      """ This cleans the tweets text content
          so it can be machine readable.
      """
      soup = BeautifulSoup(text, 'lxml') # Decodes HTML.
      souped = soup.get_text()
      try: # UTF decoding.
          bom_removed = souped.decode("utf-8-sig").replace(u"\ufffd", "?")
      except:
          bom_removed = souped
      stripped = re.sub(combined_pat, '', bom_removed) # Gets rid of links.
      stripped = re.sub(www_pat, '', stripped) # Gets rid of links.
      lower_case = stripped.lower()
      neg_handled = neg_pattern.sub(lambda x: negations_dic[x.group()], lower_case)
      letters_only = re.sub("[^a-zA-Z]", " ", neg_handled) # Only lets letters in.
      words = [x for x  in tok.tokenize(letters_only) if len(x) > 1] # Below removes whitespace
      return (" ".join(words)).strip()

In [57]:
import tweepy
import datetime
import time
from nltk.sentiment.vader import SentimentIntensityAnalyzer


def main(ticker, name, timer):
    """ timer - minutes """

    def writer(f, text, status):
        """Writes items to file in proper format """
        text = tweet_cleaner(text)
        analyser = SentimentIntensityAnalyzer()
        score = analyser.polarity_scores(text)
        pos, compound, neu, neg = score["pos"], score["compound"], score["neu"], score["neg"]
        f.write(str(status.favorite_count)+","+str(status.retweet_count)+","+str(status.source)+","+"\""+str(tweet_cleaner(status.user.location))+"\""+","+str(status.created_at)+","+text+","+str(status.id)+","+str(pos)+","+str(compound)+","+str(neu)+","+str(neg)+"\n")

    class MyStreamListener(tweepy.StreamListener):
        def __init__(self, api):
            self.api = api
            self.me = api.me()

        def on_status(self, status):
            """This Status event handler for a StreamListener prints the full text of the Tweet,
               or if it’s a Retweet, the full text of the Retweeted Tweet.
               https://www.geeksforgeeks.org/python-status-object-in-tweepy/ - Attributes
            """
            # If status is a Retweet, it will not have an extended_tweet attribute, and status.text could be truncated.
            if hasattr(status, "retweeted_status"):  # Check if Retweet
                try:
                    writer(f, status.retweeted_status.extended_tweet["full_text"], status)
                except AttributeError:
                    writer(f, status.retweeted_status.text, status)
            else:
                try:
                    writer(f, status.extended_tweet["full_text"], status)
                except AttributeError:
                    writer(f, status.text, status)

            if time.time() > timeout: # Exits after x seconds has passed.
                return False

        def on_error(self, status):
            print("Error detected")

    # Sets timer.
    timeout = time.time() + timer*60

    # Set up file.
    start_time = datetime.datetime.now()
    end_time = start_time + datetime.timedelta(minutes=timer)
    start_time = start_time.strftime("%d:%m:%y %H:%M")
    end_time = end_time.strftime("%d:%m:%y %H:%M")
    f = open(f"{ticker}: {start_time} - {end_time}.csv", "w+")
    f.write("Like count, Retweet count, Source, User location, date, tweet, tweet_id, pos, compound, neu, neg\n")

    # Authenticate to Twitter
    auth = tweepy.OAuthHandler(api_key, api_secret)
    auth.set_access_token(access_token, access_secret)

    # Create API object
    api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True)

    tweets_listener = MyStreamListener(api)
    stream = tweepy.Stream(api.auth, tweets_listener)
    stream.filter(track=[ticker, name], languages=["en"])

    f.close()