# Imports

In [1]:
%pip install tweepy --upgrade

import tweepy
import random
import configparser
import numpy as np
from string import punctuation
from collections import Counter

Collecting tweepy
  Downloading tweepy-4.4.0-py2.py3-none-any.whl (65 kB)
[K     |████████████████████████████████| 65 kB 2.6 MB/s eta 0:00:01
Installing collected packages: tweepy
Successfully installed tweepy-4.4.0
Note: you may need to restart the kernel to use updated packages.


# Download and Clean Tweets

In [2]:
config = configparser.ConfigParser()
config.read('config.ini')

tokens = config['Tokens']

client = tweepy.Client(
  bearer_token=tokens['BearerToken'],
  consumer_key=tokens['ConsumerKey'],
  consumer_secret=tokens['ConsumerSecret'],
  access_token=tokens['AccessToken'],
  access_token_secret=tokens['AccessTokenSecret'],
)

In [16]:
tweets = []
users = [
  'afraidofwasps',
  'ameliaelizalde',
  'boss_on_here',
  'darth_erogenous',
  'dril',
  'i_zzzzzz',
  'laserboat999',
  'len0killer',
  'Liv_Agar',
  'lunch_enjoyer',
  'nibiru_TRUTH',
  'OkButStill',
  'oldfriend99',
  'peterxinping',
  'pizza_jones',
  'RadishHarmers',
  'rajat_suresh',
  's4m31p4n',
  'Senn_Spud',
  'yesitsmyaccount',
  'ZeroSuitCamus'
]

# get twitter ids from usernames
user_data = client.get_users(usernames=users)
user_ids = list(map(lambda x: x['id'], user_data[0]))

# get tweets for each user id
for user_id in user_ids:
  users_tweets = client.get_users_tweets(
    user_id,
    exclude=['retweets', 'replies'], 
    max_results=100,
  )
  tweets += list(map(lambda x: x['text'], users_tweets[0]))

tweets

['been smearing worms on the sidewalk all week and no organs in there, No systems. just this weird material...',
 'worm is the same material all the way theough... no organs...',
 "He's doing topical tweets again... Weird guys look away, it's a normies paradise tonight",
 '(Jar Jar Binks getting the word "Meesa" in wordle)\n\nWordle 216 1/6\n\n🟩🟩🟩🟩🟩',
 "(getting pelted by an upperclassman) Well, I suppose the Snow-Ball hath flown its arc and starr'd the Sides of My Head. Lmfao. (glancing at my crush to see if she caught the reference but she's sending Shawn an Emoji)",
 "Had a dream there was a Todd and Aaron's Game Awards subreddit with like 100k members and everybody would post and discuss the rankings",
 'Multiple invisible quote tweets means you either wrote something that sucks ass or you wrote something that gives sensitive, terrified people strength',
 '(Bully circling me like an apex predator) Aww is somebody having a cromulent fuckcrustable of a day? Does somebody needy drinky

In [17]:
def clean_tweet(tweet):
  tweet = tweet.lower()
  tweet = remove_links(tweet)
  tweet = separate_punctuation(tweet)
  return tweet

def remove_links(tweet):
  split_tweet = tweet.split()
  split_tweet = [split_str for split_str in split_tweet if split_str if 'https://' not in split_str]
  tweet = ' '.join(split_tweet)
  return tweet

def separate_punctuation(tweet):
  tweet_chars = [f' {char} ' if char in punctuation else char for char in tweet]
  tweet = ''.join(tweet_chars)
  return tweet

cleaned_tweets = list(map(clean_tweet, tweets))
cleaned_tweets = [tweet for tweet in cleaned_tweets if len(tweet) != 0]

cleaned_tweets

['been smearing worms on the sidewalk all week and no organs in there ,  no systems .  just this weird material .  .  . ',
 'worm is the same material all the way theough .  .  .  no organs .  .  . ',
 "he ' s doing topical tweets again .  .  .  weird guys look away ,  it ' s a normies paradise tonight",
 ' ( jar jar binks getting the word  " meesa "  in wordle )  wordle 216 1 / 6 🟩🟩🟩🟩🟩',
 " ( getting pelted by an upperclassman )  well ,  i suppose the snow - ball hath flown its arc and starr ' d the sides of my head .  lmfao .   ( glancing at my crush to see if she caught the reference but she ' s sending shawn an emoji ) ",
 "had a dream there was a todd and aaron ' s game awards subreddit with like 100k members and everybody would post and discuss the rankings",
 'multiple invisible quote tweets means you either wrote something that sucks ass or you wrote something that gives sensitive ,  terrified people strength',
 ' ( bully circling me like an apex predator )  aww is somebody hav

# Create N-gram Model

In [18]:
class TweetModel:
  def __init__(self, tweets, n):
    self.n = n
    self.counts = Counter()
    self.process_tweets(tweets)

  def create_ngrams(self, tokens):
    ngrams = []
    for i in range(len(tokens) + 1):
        context = []
        for j in range(self.n - 1, 0, -1):
            if i - j < 0:
                context.append('<s>')
            else:
                context.append(tokens[i - j])
        if i == len(tokens):
            ngrams.append((tuple(context), '</s>'))
        else:
            ngrams.append((tuple(context), tokens[i]))
    return tuple(ngrams)

  def process_tweets(self, tweets):
    tokenized_tweets = [tweet.split() for tweet in tweets]
    for tweet in tokenized_tweets:
      tweet_ngrams = self.create_ngrams(tweet)
      for ngram in tweet_ngrams:
        self.counts[ngram] += 1

  def tokenize(self, tweet):
    tweet_split = tweet_chars.split()
    return [split_str.strip() for split_str in tweet_split]

  def get_probability(self, context, token):
    denominator = 0
    for ngram, count in self.counts.items():
      if ngram[0] == context:
        denominator += count
    return self.counts[(context, token)] / denominator

  def random_token(self, context):
    tokens = [(token, count) for (context1, token), count in self.counts.items() if context == context1]
    tokens = [token for token, _ in sorted(tokens, key=lambda x: x[1], reverse=True)]

    prob = 0
    random_token = None
    for token in tokens:
      random_token = token
      prob += self.get_probability(context, token)
      if prob > random.random():
        break
    return random_token

  def random_tweet(self):
    start_context = ['<s>'] * (self.n - 1)
    curr_context = start_context
    tweet = []
    while True:
      token = self.random_token(tuple(curr_context))
      if token == '</s>':
        break
      tweet.append(token)
      curr_context.pop(0)
      curr_context.append(token)
    return ' '.join(tweet)

In [19]:
model = TweetModel(tweets=cleaned_tweets, n=3)

# Generate tweets

In [22]:
def print_tweet(tweet):
  print('+------------------------------------------------------------+')
  print('| +------+                                                   |')
  print('| |  /\  | N-Gram Bot @ngrambot - 12h                        |')
  print('| | [OO] |                                                   |')
  print('| |  []  |                                                   |')
  print('| +------+                                                   |')
  print('|                                                            |')

  for line in tweet_to_lines(tweet):
    padding = ' ' * (59 - len(line))
    print('| ' + line + padding + '|')

  print('|                                                            |')
  print('| <3 31.4k                                                   |')
  print('+------------------------------------------------------------+')

def tweet_to_lines(tweet):
  lines = []
  curr_line = ''
  for token in tweet.split():
    if len(curr_line) + len(token) + 1 <= 59:
      curr_line += token + ' '
    else:
      lines.append(curr_line)
      curr_line = token + ' '
  lines.append(curr_line)
  return lines

In [23]:
for i in range(2):
  print_tweet(model.random_tweet())

+------------------------------------------------------------+
| +------+                                                   |
| |  /\  | N-Gram Bot @ngrambot - 12h                        |
| | [OO] |                                                   |
| |  []  |                                                   |
| +------+                                                   |
|                                                            |
| when the guy who is also his wife is like a gun into the   |
| pit at a low volume .                                      |
|                                                            |
| <3 31.4k                                                   |
+------------------------------------------------------------+
+------------------------------------------------------------+
| +------+                                                   |
| |  /\  | N-Gram Bot @ngrambot - 12h                        |
| | [OO] |                                             