<a href="https://colab.research.google.com/github/HumanitiesDataAnalysis/code20/blob/master/Tweeting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
remote = "http://benschmidt.org/tweets.ndjson"
import urllib.request
remote_text = urllib.request.urlopen(remote).read().decode("utf-8")
import re
import json

In [5]:
polling_locations = [(40.5680594, -74.1181754), (40.629476, -74.1484488), (40.5476486, -74.1541394), (40.6088665, -74.1643063), (40.5094381, -74.2452944), (40.5825958, -74.077497), (40.6163124, -74.1437097), (40.5403057, -74.1728837), (40.627431, -74.0805295), (40.6349863, -74.1148927), (40.7023006, -73.7982035), (40.7684097, -73.9303472), (40.7565904, -73.8335695), (40.5894124, -73.8037499), (40.7896488, -73.77216421), (40.6728572, -73.832624), (40.7340215, -73.8626605), (40.7372292, -73.8807904), (40.7502102, -73.8872411), (40.7382528, -73.8216732), (40.7563494, -73.9261383), (40.7438487, -73.93516659), (40.7709491, -73.788652), (40.6963676, -73.7374843), (40.7137044, -73.8303458), (40.7614878, -73.8692014), (40.7414554, -73.733647), (40.7105133, -73.8938946), (40.7971389, -73.9690754), (40.8025784, -73.956329), (40.8162568, -73.9605773), (40.7272482, -74.003446), (40.7714509, -73.9602622), (40.8140141, -73.9443246), (40.7298054, -73.9999408), (40.7507082, -73.9959402), (40.8491027, -73.933295), (40.7134495, -73.9882219), (40.7936912, -73.9507996), (40.7379838, -73.9773665), (40.842416, -73.9437307), (40.7710481, -73.9846269), (40.8406233, -73.9415832), (40.7286686, -73.9798197), (40.5896371, -73.9888006), (40.7092093, -73.9461217), (40.6500809, -73.8891544), (40.6659961, -73.9066642), (40.7046746, -73.9679046), (40.6739286, -73.9371146), (40.6110736, -73.9933181), (40.6858951, -73.919669), (40.6348776, -73.9129108), (40.662725, -74.016379), (40.6391085, -74.017881), (40.6954775, -73.9889615), (40.6880793, -73.9717005), (40.6460037, -73.9595704), (40.6758608, -74.0045508), (40.696622, -73.9494246), (40.6539157, -73.9361042), (40.6272269, -74.0415647), (40.6212115, -73.984709), (40.6779639, -73.8936177), (40.6079272, -73.9650117), (40.5739913, -73.9926782), (40.6967365, -73.9141968), (40.6712083, -73.9658186), (40.630656, -73.9570127), (40.6826571, -73.9774709), (40.6192654, -73.9268272), (40.8740335, -73.8339288), (40.8481724, -73.8999344), (40.8215181, -73.8576028), (40.8158207, -73.8165938), (40.900772, -73.8561174), (40.8644488, -73.9024384), (40.82554, -73.8695477), (40.8583665, -73.8892316), (40.8790803, -73.913484), (40.8602873, -73.8625646), (40.8331536, -73.9075623), (40.8833918, -73.8548003), (40.8349613, -73.8785324), (40.823494, -73.9007195), (40.8260887, -73.9260005), (40.8328187, -73.9224181), (40.835419, -73.8627648)]

In [6]:
class Tweet:
  def __init__(self, text_string):
    """
    Initialized using the text of json. (We could require the 
    json to be parsed first, if we wanted... but whatever!)
    """
    self.tweet = json.loads(text_string)

  def text(self):
    return self.tweet['text']

  def user(self):
    return self.tweet['user']['name']

  def coords(self):
    try:
      long, lat = self.tweet['coordinates']['coordinates']
      return (lat, long)
    except:
      return None

  def distance_to_polling_place(self):
    """
    Distance in miles to the nearest polling place.
    """
    # Start by assuming infinite distance
    best_distance = float("inf")
    if self.coords() is None:
      return best_distance
    lat, long = self.coords()
    for plat, plong in polling_locations:
      # Distance using the pythagorean theorem
      # In New York, a degree of longitude is about 2/3 the length of a
      # degree of latitude, and I can't let that go.
      distance = ((lat - plat)**2 + ((long - plong) * 2/3 )**2)**(1/2)
      if distance < best_distance:
        best_distance = distance
    # We're measuring in degrees, which are about 111 miles.
    return best_distance * 111

  def __repr__(self):
    return "TWEET: " + self.text()[:100]

  def bio(self):
    return self.tweet['user']['description']



In [7]:
class Tweetset():
  """
  A class that manages a list of tweets.
  """
  def __init__(self, sourcetext = None, tweetlist = None):
    self.tweets = []
    if tweetlist is not None:
      self.tweets = tweetlist
      return
    for line in sourcetext.split("\n"):
      try:
        the_tweet = Tweet(line)
        self.tweets.append(the_tweet)
      except:
        pass

  def find_regex(self, regex):
    all_text = []
    for tweet in self.tweets:
        text = tweet.text()
        if re.search(regex, text):
            all_text.append(tweet)
    return all_text

  def exclude_user(self, name):
    tweets = []
    for tweet in self.tweets:
      if tweet.user() != name:
        tweets.append(tweet)
    return Tweetset(tweetlist = tweets)

  def sort_by_polling_distance(self):
    self.tweets.sort(key = lambda x: x.distance_to_polling_place())

  def filter_to_election_hashtags(self):
      """
      print every tweet text that has a election-related string in it.
      """
      el_regex = "🌹|🗳️|🇺🇸|Trump|Pence|[vV]ot(e|ing|d)|line|Biden|Harris|2020|[Ee]lect(ion)|poll(s)"
      for tweet in self.find_regex(el_regex):
          print(tweet.text())

In [8]:
nyc = Tweetset(remote_text)

In [9]:
no_511 = nyc.exclude_user("511 New York").exclude_user("511NY 456 Lines")

In [11]:
no_511.sort_by_polling_distance()
no_511.tweets[1], no_511.tweets[1].distance_to_polling_place()

(TWEET: #CRUMMIEBEATS #nolimit #mercedesbenz #beats #luxurylifestyle #fym @ Outside https://t.co/JfuNrGxIXX,
 0.02286173367297525)

## Inheriting from a list...

...means that we get all the list methods directly

In [12]:
class Tweetlist(list):
  """
  A class that manages a list of tweets.
  """
  def __init__(self, sourcetext = None, tweetlist = None):
    if tweetlist is not None:
      super().__init__(tweetlist)
      self.tweets = tweetlist
      return
    super().__init__([])
    for line in sourcetext.split("\n"):
      try:
        the_tweet = Tweet(line)
        self.append(the_tweet)
      except:
        pass

  def find_regex(self, regex):
    all_text = []
    for tweet in self:
        text = tweet.text()
        if re.search(regex, text):
            all_text.append(tweet)
    return all_text

  def exclude_user(self, name):
    tweets = []
    for tweet in self:
      if tweet.user() != name:
        tweets.append(tweet)
    return Tweetset(tweetlist = tweets)

  def sort_by_polling_distance(self):
    self.sort(key = lambda x: x.distance_to_polling_place())

  def filter_to_election_hashtags(self):
      """
      print every tweet text that has a election-related string in it.
      """
      el_regex = "🌹|🗳️|🇺🇸|Trump|Pence|[vV]ot(e|ing|d)|line|Biden|Harris|2020|[Ee]lect(ion)|poll(s)"
      for tweet in self.find_regex(el_regex):
          print(tweet.text())

In [13]:
nyc = Tweetlist(remote_text)

for t in nyc:
  if t.distance_to_polling_place() < 1/10:
    print(f"{t.text()}")


Get out &amp; vote (@ St Johns Rec Center in Brooklyn, NY) https://t.co/AMiYdjPvNR https://t.co/nKrrvU0Wa0
I'm at Sarku Japan in Brooklyn, NY w/ @browncoko @gerryvisco @pattydukesnyc @kimbu18 https://t.co/qmLXPTVEFV
Just posted a photo @ Cuts &amp; Slices NYC https://t.co/zZIiGgesY7
Just posted a photo @ Harlem Food Bar https://t.co/Fb996tB6hX
I'm at Columbia University Medical Center in New York, NY https://t.co/T63EerEZ8T
Dr Kristine Hassan, NYC testimonial about FMR Education...passion, support, safe place to learn with no egos!! Than… https://t.co/TJAvsR37tT
Just posted a photo @ RUE-B https://t.co/L5FoLv2RMR
Dynamite frites as you head into the weekend... 📷: @movingforwardpr  #fries #fries #pommesfrites #belgianfries… https://t.co/ySr8X4nRil
#repost @elviejoyayo
・・・
Churrasco (skirt Steak) for the perfect steak dinner. @ Yayo's Latin Cuisine https://t.co/2NJXcsNiv6
HalfAMill #RestInPowerKing🙏
#MiliatoForEver💪💯 #BrooklynLegends #GoneButneverForgotten #OriginalFirmBiz… https://t.co/

In [14]:
nyc[0]

TWEET: Cleared: Incident on #NJ139Lower EB from East of Tonnelle Avenue to Jersey Ave