In [3]:
import ijson

In [4]:
def tweet_map(json_file, tweet_func, save=False):
    """
    Apply a function to each tweet in a json file 
    
    json_file - path to tweet json file
    tweet_func - function that takes in a 'tweet' object, and returns a 'tweet' object
    save (optional) - overwrite json_file with modified json
    
    returns list where each tweet has tweet_func applied to it
   
    """
    mapped_tweets = []
    with open(json_file, 'r') as f:
        for tweet in ijson.items(f, "item"):
            mapped_tweets.append(tweet_func(tweet))
    if save:
        list_to_json(mapped_tweets, json_file)
    return mapped_tweets

In [5]:
def add_test(tweet):
    tweet["test"] = True
    return tweet

processed_tweets = tweet_map("../json/sarcastic/unique.json", add_test)
processed_tweets

[{'id': 827590313191444481,
  'media': False,
  'test': True,
  'text': '@MetalBlonde Come on Karina! What about the #bowlinggreenmassacre ? Damn those Muslims!  #alternatefacts #Sarcasm #WorstPresidentEver',
  'urls': False},
 {'id': 827590233298436101,
  'media': False,
  'test': True,
  'text': 'Take that Black History month.Make America White again, with fatty tissue around the heart #sarcasm #trump #first100\nhttps://t.co/SXnqNmRgqr',
  'urls': True},
 {'id': 827589626944770049,
  'media': False,
  'test': True,
  'text': 'Ears lowered big ears to lower \ud83d\udc87\u200d♂️\ud83d\ude02 #Trim #BigEars #Jokes #Selfie #MugShot #Weekend #Smile #Happy #Sarcasm… https://t.co/oXK7RHlODU',
  'urls': True},
 {'id': 827589344932352000,
  'media': False,
  'test': True,
  'text': 'Officially in love with M$ Azure #sarcasm #powerless',
  'urls': False},
 {'id': 827588929398374400,
  'media': False,
  'test': True,
  'text': '#lol #funny #comedy #sarcasm RT DivinityLA: Introducing "Friendship"

In [6]:
def tweet_iterate(json_file, key=None):
    """
    Stream through objects in a json file

    json_file - path to tweet json file
    key (optional) - single key value of interest (ex: return only "text" field, or only "id" field of each tweet)
    """

    with open(json_file, 'r') as f:
        if key:
            for tweet in ijson.items(f, "item.{}".format(key)):
                yield tweet
        else:
            for tweet in ijson.items(f, "item"):
                yield tweet

In [7]:
for thing in tweet_iterate("../json/sarcastic/unique.json", key="id"):
    print(thing)

827590313191444481
827590233298436101
827589626944770049
827589344932352000
827588929398374400
827588865733120004
827588725878116352
827588419576545280
827588001953873921
827587689004355584
827587373269540864
827587185981390849
827586730790420485
827585865597722624
827585687545339905
827584165478436868
827583597842362368
827582714199076864
827582415270916096
827582376649711617
827582273650176000
827582190477127680
827582098991116288
827581929633480704
827581797659705344
827580923264765953
827580820890152961
827580423572107265
827580297097129985
827580292248449024
827580137285758976
827579874562945033
827579086348369921
827579027888054272
827579014718033920
827578488727166976
827578403595313153
827578076129329155
827578007527243779
827577971955228673
827577557465837568
827576811471122436
827576695586648064
827575428474761216
827575171783483392
827574412513775617
827573170567860226
827572264329637888
827571921252347904
827571390085742592
827571213266464769
827570625958928384
827570052471

In [9]:
# partial iteration on a generator using itertools
import itertools
for thing in itertools.islice(tweet_iterate("../json/sarcastic/unique.json", key="id"), 5):
    print(thing)

827590313191444481
827590233298436101
827589626944770049
827589344932352000
827588929398374400
