[Reference](https://kjhealey.medium.com/the-open-source-data-platform-part-1-orchestration-prefect-d7b635a71ce5)

In [1]:
!pip install prefect  ## requires python3.7 or later
!prefect backend server
!prefect server start

In [2]:
!prefect agent local start

In [3]:
from prefect.run_configs import LocalRun
from prefect import task, Flow, Parameter
from prefect.schedules import Schedule
from prefect.engine.results import LacalResult
from prefect.schedules import IntervalSchedule
from datetime import timedelta
import requests
import pymongo
import tweepy
import json

# creates connection to twitter using the tweepy client.  
@task(name="authorize connection")
def pre_auth(bearer_token):
    return tweepy.Client(bearer_token=bearer_token, return_type = requests.Response)

# using the tweepy api query the data only for those I want.
@task(name="query data")
def filter(client, query):
    return client.search_recent_tweets(query=query, tweet_fields=['context_annotations', 'created_at'], max_results=100).json()

# Prints the data to Console if you wish to see the output, plus check that data is being gathered.
@task(name="print data")
def show(tweets):
    for tweet in tweets.data:
        print(tweet.text)
        if len(tweet.context_annotations) > 0:
            print(tweet.context_annotations)

# Saves the twitter data to a local host mongo instance
# These are hard coded in this example to save time
# In the next article I will show you why this might not be needed.  

@task(name="save data")
def save(tweets):
    client = pymongo.MongoClient("mongodb://localhost:27017/")
    db = client['tweets']
    collection = db['prefect']
    data = tweets['data']
    for datum in data:
        collection.insert_one(datum)

schedule = IntervalSchedule(interval=timedelta(minutes=2))

# order your tasks within a flow. 
with Flow("Twitter Extract", schedule=schedule, result=LocalResult()) as flow:
    client = pre_auth("bearer token here") # Connect to the client
    tweets = filter(client, '#prefect -is:retweet lang:en') # Filter the data down to relevant data
    # show(tweets) # Show the tweets, this step is more for local debugging purposes will be turned off when flow is stable
    save(tweets) # Save tweets as collections inside of a mongo db instance. 

flow.run_config = LocalRun()
flow.run()

flow.register(project_name="twitter_extract")