In [2]:
# Project 3 - GeoTweet+
# 
# @Author Jeffery Brown (daddyjab)
# @Date 5/1/19
# @File GeoTweet_Adding_Tweet_Table


# Code: app.py - Initial Flask App Setup

In [3]:
# Project 3 - GeoTweet+
# 
# @Author Jeffery Brown (daddyjab)
# @Date 5/1/19
# @File app.py


# import necessary libraries
import os
from flask import Flask, render_template, jsonify, request, redirect

# Import Flask_CORS extension to enable Cross Origin Resource Sharing (CORS)
# when deployed on Heroku
from flask_cors import CORS

#################################################
# Flask Setup
#################################################
app = Flask(__name__)

# Enable Tracking of Flask-SQLAlchemy events for now (probably not needed)
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True

# Provide cross origin resource sharing
CORS(app)

#################################################
# Database Setup
#################################################

from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.sql.expression import func, and_, or_
from sqlalchemy.sql.functions import coalesce

#Probably don't need these from SQLAlchemy: asc, desc, between, distinct, func, null, nullsfirst, nullslast, or_, and_, not_

# Local DB path for SQLite - default
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    db_path_flask_app = "sqlite:///data/twitter_trends.db"
# CODE TO RUN IN JUPYTER NOTEBOOK
db_path_flask_app = "sqlite:///../../python/data/twitter_trends.db"

# Local DB path for PostgreSQL - use only if login/password populated
try:
    # PostgreSQL Database Login/Password  
    # -- only needed if using a local PostgresSQL instance (vs. SQLite)
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    from .api_config import (postgres_geotweetapp_login, postgres_geotweetapp_password)
# CODE TO RUN IN JUPYTER NOTEBOOK
    from api_config import (postgres_geotweetapp_login, postgres_geotweetapp_password)

    # If the login and password is populated
    if (postgres_geotweetapp_login is not None) and (postgres_geotweetapp_password is not None):
        db_path_flask_app = f"postgresql://{postgres_geotweetapp_login}:{postgres_geotweetapp_password}@localhost/twitter_trends"
        print("Note: PostgreSQL database login/password is populated")

# If the api_config file is not available, then all we can do is flag an error
except ImportError:
    print("Note: PostgreSQL database login/password is *not* populated")

app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get('DATABASE_URL', '') or db_path_flask_app

# Flask-SQLAlchemy database
db = SQLAlchemy(app)

# Import the schema for the Location and Trend tables needed for
# 'twitter_trends.sqlite' database tables 'locations' and 'trends'
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK: from .models import (Location, Trend)

# Import database management functions needed# to update the
# 'twitter_trends.sqlite' database tables 'locations' and 'trends'
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK: from .db_management import (
#     api_rate_limits, api_calls_remaining, api_time_before_reset,
#     update_db_locations_table, update_db_trends_table,
#     parse_date_range
#     )

Note: PostgreSQL database login/password is *not* populated


# Code: models.py - SQLAlchemy Models for Tables
# In app.py: `from .models import (Location, Trend)`

In [4]:
# Project 3 - GeoTweet+
# 
# @Author Jeffery Brown (daddyjab)
# @Date 5/1/19
# @File models.py

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK: from .app import db

# Database schema for Twitter 'locations' table
class Location(db.Model):
    __tablename__ = 'locations'
    
    # Defining the columns for the table 'locations',
    # which will hold all of the locations in the U.S. for which
    # top trends data is available, as well as location specific
    # info like latitude/longitude
    id = db.Column(db.Integer, primary_key=True)
    updated_at = db.Column( db.DateTime )
    woeid = db.Column(db.Integer, unique=True, nullable=False)
    twitter_country = db.Column(db.String(100))
    tritter_country_code = db.Column(db.String(10))
    twitter_name = db.Column(db.String(250))
    twitter_parentid = db.Column(db.Integer)
    twitter_type = db.Column(db.String(50))
    country_name = db.Column(db.String(250))
    country_name_only = db.Column(db.String(250))
    country_woeid = db.Column(db.Integer)
    county_name = db.Column(db.String(250))
    county_name_only = db.Column(db.String(250))
    county_woeid = db.Column(db.Integer)
    latitude = db.Column(db.Float)
    longitude = db.Column(db.Float)
    name_full = db.Column(db.String(250))
    name_only = db.Column(db.String(250))
    name_woe = db.Column(db.String(250))
    place_type = db.Column(db.String(250))
    state_name = db.Column(db.String(250))
    state_name_only = db.Column(db.String(250))
    state_woeid = db.Column(db.Integer)
    timezone = db.Column(db.String(250))

    my_trends = db.relationship('Trend', backref=db.backref('my_location', lazy=True))
    
    def __repr__(self):
        return f"<Location {self.name_full} [updated_at: {self.updated_at}>"

In [5]:
# Database schema for Twitter 'trends' table
class Trend(db.Model):
    __tablename__ = 'trends'
    
    # Defining the columns for the table 'trends',
    # which will hold all of the top trends associated with
    # locations in the 'locations' table
    id = db.Column(db.Integer, primary_key=True)
    updated_at = db.Column( db.DateTime )
    woeid = db.Column(db.Integer, db.ForeignKey('locations.woeid') )
    twitter_as_of = db.Column(db.String(100))
    twitter_created_at = db.Column(db.String(100))
    twitter_name = db.Column(db.String(250))
    twitter_tweet_name = db.Column(db.String(250))
    twitter_tweet_promoted_content = db.Column(db.String(250))
    twitter_tweet_query = db.Column(db.String(250))
    twitter_tweet_url = db.Column(db.String(250))
    twitter_tweet_volume = db.Column(db.Float)

    def __repr__(self):
        return f"<Trend {self.my_location.name_full}: {self.twitter_tweet_name} [updated_at: {self.updated_at}>"

In [6]:
# Database schema for Twitter 'trends' table
class Tweet(db.Model):
    __tablename__ = 'tweets'
    
    # Defining the columns for the table 'tweets',
    # which will hold tweets associated the search terms in the 'trends' table,
    # which are referred to in that table as "twitter_tweet_name"
    id = db.Column(db.Integer, primary_key=True)
    updated_at = db.Column( db.DateTime )
    
    tweet_id = db.Column( db.Integer )
    tweet_id_str = db.Column( db.String(50), unique=True, nullable=False )
    # tweet_search_term = db.Column(db.Integer, db.ForeignKey('trends.twitter_tweet_name') )
    tweet_search_term = db.Column(db.String(250))
    tweet_created_at = db.Column(db.String(100))
   
    tweet_is_a_quote_flag = db.Column( db.Boolean )
    tweet_is_a_retweet_flag = db.Column( db.Boolean )

    tweet_entities_hashtags_count = db.Column( db.Integer )
    tweet_entities_user_mentions_count = db.Column( db.Integer )
    tweet_favorite_counts = db.Column( db.Integer )
    tweet_retweet_counts = db.Column( db.Integer )
    
    tweet_lang = db.Column( db.String(10) )
    tweet_source = db.Column(db.String(250))
    tweet_text = db.Column(db.String(250))    
    
    tweet_user_id = db.Column( db.Integer )
    tweet_user_id_str = db.Column( db.String(50) )
    tweet_user_created_at = db.Column(db.String(100))
    tweet_user_lang = db.Column( db.String(10) )
    tweet_user_name = db.Column( db.String(100) )
    tweet_user_screen_name = db.Column( db.String(100) )
    tweet_user_description = db.Column( db.String(250) )
    tweet_user_statuses_count = db.Column( db.Integer )
    tweet_user_favourites_count = db.Column( db.Integer )
    tweet_user_followers_count = db.Column( db.Integer )
    tweet_user_friends_count = db.Column( db.Integer )
    tweet_user_listed_count = db.Column( db.Integer )
    
    def __repr__(self):
        return f"<Tweet {self.tweet_search_term}: {self.tweet_id} [updated_at: {self.updated_at}>"

# Code: db_management.py - Database Update Functions
# In app.py: `from .db_management import (`
#     `api_rate_limits, api_calls_remaining, api_time_before_reset,`
#     `update_db_locations_table, update_db_trends_table,`
#     `parse_date_range`
#     `)`

In [7]:
# Project 3 - GeoTweet+
# 
# @Author Jeffery Brown (daddyjab)
# @Date 5/1/19
# @File db_management.py

# This file contains function which update the
# 'tritter_trends.sqlite' database tables
# 'locations' and 'trends' via API calls to Twitter and Flickr

# The following dependencies are only required for update/mgmt of
# 'locations' and 'trends' data
# datetime (datetime, date) and dateutil(parser)
# may be required by some Flask routes
# indirectly via the parse_date_range() function
import json
import time
import os
import pandas as pd
import numpy as np
from datetime import datetime, date
from dateutil import tz, parser

import requests
from requests.utils import quote

from pprint import pprint

# Import a pointer to the Flask-SQLAlchemy database session
# created in the main app.py file
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:         from .app import db, app

# Import the Database models defined in the models.py file
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:         from .models import Location, Trend

# Only perform import of local API config file if this Flask app is being run locally.
# If being run from Heroku the keys will be provided
# via the app environment variables configured there

try:
    # This will run if the keys are all set via Heroku environment

    # Twitter API
    key_twitter_tweetquestor_consumer_api_key = os.environ['key_twitter_tweetquestor_consumer_api_key']
    key_twitter_tweetquestor_consumer_api_secret_key = os.environ['key_twitter_tweetquestor_consumer_api_secret_key']
    key_twitter_tweetquestor_access_token = os.environ['key_twitter_tweetquestor_access_token']
    key_twitter_tweetquestor_access_secret_token = os.environ['key_twitter_tweetquestor_access_secret_token']

    # Flickr API
    key_flicker_infoquestor_key = os.environ['key_flicker_infoquestor_key']
    key_flicker_infoquestor_secret = os.environ['key_flicker_infoquestor_secret']

except KeyError:
    # Keys have not been set in the environment
    # So need to import them locally
    try:
        # Twitter API keys
        # Flickr API keys
# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:         from .api_config import *
# CODE TO RUN IN JUPYTER NOTEBOOK
        from api_config import *

    # If the api_config file is not available, then all we can do is flag an error
    except ImportError:
        print("Import Keys: At least one of the API Keys has not been populated on Heroku, and api_config not available!")

# Setup Tweepy API Authentication to access Twitter
import tweepy

try:
    auth = tweepy.OAuthHandler(key_twitter_tweetquestor_consumer_api_key, key_twitter_tweetquestor_consumer_api_secret_key)
    auth.set_access_token(key_twitter_tweetquestor_access_token, key_twitter_tweetquestor_access_secret_token)
    api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

except TweepError:
    print("Authentication error: Problem authenticating Twitter API using Tweepy (TweepError)")

In [8]:
# # Function Definitions: Twitter API Rate Limit Management

def api_rate_limits():
# Return the number of Twitter API calls remaining
# for the specified API type:
# "trends/place": Top 10 trending topics for a WOEID
# "trends/closest": Locations near a specificed lat/long for which Twitter has trending topic info
# "trends/available": Locations for which Twitter has topic info
# "search/tweets": 
# "users/search"
# "users/shows"
# "users/lookup"
# 
# Global Variable: 'api': Tweepy API
# 

    # Get Twitter rate limit information using the Tweepy API
    try:
        rate_limits = api.rate_limit_status()
    
    except RateLimitError as e:
        print("Tweepy API: Problem getting Twitter rate limits information using tweepy - RateLimitError")
        pprint(e)
        
    except:
        print("Tweepy API: Problem getting Twitter rate limits information using tweepy")
        return ""

    # Return the remaining requests available for the
    # requested type of trends query (or "" if not a valid type)
    try:
        return rate_limits['resources']

    except:
        return ""

In [9]:
def api_calls_remaining( a_type = "place"):
# Return the number of Twitter API calls remaining
# for the specified API type:
# 'place': Top 10 trending topics for a WOEID
# 'closest': Locations near a specificed lat/long for which Twitter has trending topic info
# 'available': Locations for which Twitter has topic info
# 
# Global Variable: 'api': Tweepy API
# 

    # Get Twitter rate limit information using the Tweepy API
    rate_limits = api.rate_limit_status()
    
    # Focus on the rate limits for trends calls
    trends_limits = rate_limits['resources']['trends']
    
    # Return the remaining requests available for the
    # requested type of trends query (or "" if not a valid type)
    try:
        remaining = trends_limits[ f"/trends/{a_type}" ]['remaining']
        print(f"Twitter API 'trends/{a_type}' - API Calls Remaining: {remaining}")

    except:
        return ""

    return remaining

In [10]:
def api_time_before_reset( a_type = "place"):
# Return the number of minutes until the Twitter API is reset
# for the specified API type:
# 'place': Top 10 trending topics for a WOEID
# 'closest': Locations near a specificed lat/long for which Twitter has trending topic info
# 'available': Locations for which Twitter has topic info
# 
# Global Variable: 'api': Tweepy API
# 

    # Get Twitter rate limit information using the Tweepy API
    rate_limits = api.rate_limit_status()
    
    # Focus on the rate limits for trends calls
    trends_limits = rate_limits['resources']['trends']
    
    
    # Return the reset time for the
    # requested type of trends query (or "" if not a valid type)
    try:
        reset_ts = trends_limits[ f"/trends/{a_type}" ]['reset']
    except:
        return -1
        
    # Calculate the remaining time using datetime methods to
    # get the UTC time from the POSIX timestamp
    reset_utc = datetime.utcfromtimestamp(reset_ts)
    
    # Current the current time
    current_utc = datetime.utcnow()
    
    # Calculate the number of seconds remaining,
    # Assumption: reset time will be >= current time
    time_before_reset = (reset_utc - current_utc).total_seconds() / 60.0
    
    # Tell the datetime object that it's in UTC time zone since 
    # datetime objects are 'naive' by default
    reset_utc = reset_utc.replace(tzinfo = tz.tzutc() )
    
    # Convert time zone
    reset_local = reset_utc.astimezone( tz.tzlocal() )

    # Tell the datetime object that it's in UTC time zone since 
    # datetime objects are 'naive' by default
    current_utc = current_utc.replace(tzinfo = tz.tzutc() )
    
    # Convert time zone
    current_local = current_utc.astimezone( tz.tzlocal() )
    print(f"Twitter API 'trends/{a_type}' - Time Before Rate Limit Reset: {time_before_reset:.1f}: Reset Time: {reset_local.strftime('%Y-%m-%d %H:%M:%S')}, Local Time: {current_local.strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Return the time before reset (in minutes)
    return time_before_reset

In [11]:
# # Function Definitions: Twitter Locations with Available Trends Info

def get_loc_with_trends_available_to_df( ):
# Get locations that have trends data from a api.trends_available() call,
# flatten the data, and create a dataframe

    # Obtain the WOEID locations for which Twitter Trends info is available
    try:
        trends_avail = api.trends_available()
        
    except:
        # No locations info available, return False
        print(f"Tweepy API: Problem getting locations that have trends available information")
        return False
    
    # Import trend availability info into a dataframe
    trends_avail_df = pd.DataFrame.from_dict(trends_avail, orient='columns')
    
    # Set the 'updated_at' column to the current time in UTC timezone for all locations
    trends_avail_df['updated_at'] = datetime.utcnow()

    # Retain only locations in the U.S.
    trends_avail_df = trends_avail_df[ (trends_avail_df['countryCode'] == "US") ]
        
    # Reset the index
    trends_avail_df.reset_index(drop=True, inplace=True)

    # Flatten the dataframe by unpacking the placeType column information into separate columns
    trends_avail_df['twitter_type'] = trends_avail_df['placeType'].map( lambda x: x['name'])

    # Remove unneeded fields
    trends_avail_df.drop(['placeType', 'url' ], axis='columns' , inplace = True)

    # Rename the fields
    trends_avail_df.rename(columns={
        'woeid': 'woeid',
        'country': 'twitter_country',
        'countryCode': 'tritter_country_code',
        'name': 'twitter_name',
        'parentid': 'twitter_parentid' }, inplace=True)
    
    return trends_avail_df

In [12]:
def get_location_info( a_woeid ):
# Use Flickr API call to get location information associated with a Yahoo! WOEID
# Note: Yahoo! no longer supports this type of lookup! :(

    # Setup the Flickr API base URL
    flickr_api_base_url = f"https://api.flickr.com/services/rest/?method=flickr.places.getInfo&api_key={key_flicker_infoquestor_key}&format=json&nojsoncallback=1&woe_id="

    # Populate the WOEID and convert to string format
    woeid_to_search = str(a_woeid)
    
    # Build the full URL for API REST request
    flickr_api_url = flickr_api_base_url + woeid_to_search

    try:
        # Get the REST response, which will be in JSON format
        response = requests.get(url=flickr_api_url)
        
    except requests.exceptions.RequestException as e:
        print(f"Flickr API: Problem getting location information for WOEID {a_woeid}: ")
        return False
    
    # Parse the json
    location_data = response.json()
    
    # Check for failure to locate the information
    if (location_data['stat'] == 'fail'):
        print(f"Flickr API: Problem finding location WOEID {a_woeid}: {location_data['message']}")
        
        
    #pprint(location_data)
    
    # Return just a useful subset of the location info as flattened dictionary
    key_location_info = {}
    
    # Basic information that should be present for any location
    try:
        key_location_info.update( {
            'woeid': int(location_data['place']['woeid']),
            'name_woe': location_data['place']['woe_name'],
            'name_full': location_data['place']['name'],
            'name_only': location_data['place']['name'].split(",")[0].strip(),
            'place_type': location_data['place']['place_type'],
            'latitude': float(location_data['place']['latitude']),
            'longitude': float(location_data['place']['longitude']),
        })
                
    except:
        print("Error - basic location information not returned for WOEID{a_woeid}: ", sys.exc_info()[0])
    
    # Timezone associated with the location - if available
    try:
        key_location_info.update( {
            'timezone': location_data['place']['timezone']  
        })
        
    except:
        key_location_info.update( {
            'timezone': None
        })
        
    # County associated with the location - if available
    try:
        key_location_info.update( {
            'county_name': location_data['place']['county']['_content'],
            'county_name_only': location_data['place']['county']['_content'].split(",")[0].strip(),
            'county_woeid': int(location_data['place']['county']['woeid']),
        })
    except:
        key_location_info.update( {
            'county_name': None,
            'county_name_only': None,
            'county_woeid': None,
        })
        
    # State associated with the location - if available
    try:
        key_location_info.update( {
            'state_name': location_data['place']['region']['_content'],
            'state_name_only': location_data['place']['region']['_content'].split(",")[0].strip(),
            'state_woeid': int(location_data['place']['region']['woeid']),
        })
    except:
        key_location_info.update( {
            'state_name': None,
            'state_name_only': None,
            'state_woeid': None,
        })
        
    # Country associated with the location - if available
    try:
        key_location_info.update( {
            'country_name': location_data['place']['country']['_content'],
            'country_name_only': location_data['place']['country']['_content'].split(",")[0].strip(),
            'country_woeid': int(location_data['place']['country']['woeid']),
        })
    except:
        key_location_info.update( {
            'country_name': None,
            'country_name_only': None,
            'country_woeid': None, 
        })
    
    return key_location_info

In [13]:
def update_db_locations_table():
# Function to update the list of Twitter locations in the'locations' DB table.
# This function uses a Twitter API to get the list of locations for which top trends
# information is available.  It then uses a Flickr API to obtain location details for
# each of these Twitter specified locations.  A merge is then performed of the two
# DataFrames, resulting in a single dataframe that is used to update the 'locations' table.
# NOTE: The Twitter 'trends/available' API call is not rate limited.
#
# This function assumes that the 'locations' table in the database has already been configured
# and is ready for data.

    # Flatten the Twitter Trends results and populate in a Dataframe
    loc_with_trends_available_df = get_loc_with_trends_available_to_df( )

    # Use the get_location_info() function to add location info (from Flickr)
    # for each location (Twitter WOEID) that has trend info
    loc_info_list =  list( loc_with_trends_available_df['woeid'].apply( get_location_info ) )

    # Create a DataFrame from the location info list
    loc_info_df = pd.DataFrame.from_dict(loc_info_list)

    # Merge the Twitter trend location available dataframe with the
    # location info dataframe to create a master list of all
    # Twitter Trend locations and associated location information
    twitter_trend_locations_df = loc_with_trends_available_df.merge(loc_info_df, how='inner', on='woeid')

    # Delete all location information currently in the database 'locations' table

    # CHANGED FOR GeoTweet+: Keep all entries - don't delete them!
    # db.session.query(Location).delete()
    # db.session.commit()

    # Write this table of location data to the database 'locations' table
    # twitter_trend_locations_df.to_sql( 'locations', con=db.engine, if_exists='append', index=False)
    # db.session.commit()

    # CHANGED FOR GeoTweet+: Update locations already in the table and add locations that are not
    # There is no cross-database SQLAlchemy support for the 'upsert' operation,
    # So query for each WOEID in the dataframe and decide if an 'add' or an 'update' is needed...
    
    # Convert all 'NaN' values to 'None' to avoid issues when updating the database
    # Note: Some cities had county_woeid set to "NaN", which caused much havoc with db operations
    twitter_trend_locations_df = twitter_trend_locations_df.where((pd.notnull(twitter_trend_locations_df)), None)
    
    # Loop through all rows in the update dataframe
    n_adds = 0
    n_updates = 0
    for index, row in twitter_trend_locations_df.iterrows():
        # Get this row into a dictionary, but exclude primary key 'woeid'
        row_dict = row.to_dict()

        # pprint(f"DataFrame: {row['woeid']}")
        result = db.session.query(Location).filter( Location.woeid == row['woeid'] ).first()

        if result is None:
            # This location is not in the table, so add this entrry to the 'locations' table.
            # NOTE: 
            # Location is the Class mapped to the 'locations' table
            # row_dict is a dictionary containing all of the column values for this row as key/value pairs
            # The term "**row_dict" creates a "key=value" parameter for each key/value pair
#             print(f"ADD: DataFrame twitter_trend_locations_df: {row['woeid']} => Database 'locations': New Entry")
            try:
                db.session.add( Location(**row_dict) )
                db.session.commit()
                n_adds += 1
                
            except:
                print(f">>> Error while attempting to add record to 'locations'")
                db.session.rollback()
            
        else:
            # This location is in the table, so update this entry in the 'locations' table.
#             print(f"UPDATE: DataFrame twitter_trend_locations_df: {row['woeid']} => Database 'locations': {result.woeid}: {result.name_full}")
            
            try:
                db.session.query(Location).filter( Location.woeid == row['woeid'] ).update( row_dict )
                db.session.commit()
                n_updates += 1
                
            except:
                print(f">>> Error while attempting to update record in 'locations'")
                db.session.rollback()
                
    # Return the total number of entries in the Locations table
    num_loc = db.session.query(Location).count()
    
#   print(f"Adds/Updates complete: Adds: {n_adds}, Updates {n_updates} => Rows in 'locations' table: {num_loc}")
    
    return num_loc

In [14]:
# # Function Definitions: Twitter Top Trends for Twitter Locations

def get_trends_for_loc( a_woeid ):
# Get top Twitter trending tweets for a location specified by a WOEID,
# flatten the data, and return it as a list of dictionaries

    # Import trend availability info into a dataframe
    try:
        top_trends = api.trends_place( a_woeid )[0]
        
    except:
        # No top trends info available for this WOEID, return False
        print(f"Tweepy API: Problem getting trends information for WOEID {a_woeid}")
        return False
    
    #pprint(top_trends)
    
    # Repeat some information that is common for all elements in the trends list
    common_info = {}
        
    # Basic information that should be present for any location
    # 'updated_at': Current time in UTC timezone
    # 'as_of': '2019-03-26T21:22:42Z',
    # 'created_at': '2019-03-26T21:17:18Z',
    # 'locations': [{'name': 'Atlanta', 'woeid': 2357024}]
    try:
        common_info.update( {
            'woeid': int(top_trends['locations'][0]['woeid']),
            'updated_at': datetime.utcnow(),
            'twitter_name': top_trends['locations'][0]['name'],
            'twitter_created_at': top_trends['created_at'],
            'twitter_as_of': top_trends['as_of']
        })
                
    except:
        print("Error - basic location information not returned for WOEID{a_woeid}: ", sys.exc_info()[0])
   
    # Loop through all of the trends and store in an array of dictionary elements
    # 'name': 'Jussie Smollett'
    # 'promoted_content': None
    # 'query': '%22Jussie+Smollett%22'
    # 'tweet_volume': 581331
    # 'url': 'http://twitter.com/search?q=%22Jussie+Smollett%22'

    # Return the trends as an array of flattened dictionaries
    trend_info = []

    for ti in top_trends['trends']:
        
        # Put the trend info into a dictionary, starting with the common info
        this_trend = common_info.copy()
        
        # Timezone associated with the location - if available
        try:
            this_trend.update( {
                'twitter_tweet_name': ti['name'],
                'twitter_tweet_promoted_content': ti['promoted_content'],
                'twitter_tweet_query': ti['query'],
                'twitter_tweet_volume': ti['tweet_volume'],
                'twitter_tweet_url': ti['url']
            })

        except:
            this_trend.update( {
                'twitter_tweet_name': None,
                'twitter_tweet_promoted_content': None,
                'twitter_tweet_query': None,
                'twitter_tweet_volume': None,
                'twitter_tweet_url': None
            })
            
        # Append this trend to the list
        trend_info.append( this_trend )
    
    return trend_info

In [15]:
def update_db_trends_table():
# Function to obtain the list of Twitter locations from the 'locations' DB table.
# The function then loops through each location,
# obtains the Twitter top trends info, and then appends that data to the 'trends' table.
# The function uses rate limit check functions to see if the Twitter API call rate limit
# is about to be reached, and if so, delays the next relevant API call until the rate limit
# is scheduled to be reset (a period of up to 15minutes) before continuing.
#
# This function assumes that the 'trends' table in the database has already been configured
# and is ready for data.

    # Obtain the list of Twitter locations from the 'locations' DB table
    loc_list = [ x[0] for x in db.session.query(Location.woeid).all()]
    print(f"Retrieved {len(loc_list)} locations for processing")
    
    # Keep track of the actual number of locations
    # where trend info was written to the 'trends' table
    num_location_trends_written_to_db = 0
    
    for tw_woeid in loc_list:
        print(f">> Updating trends for location {tw_woeid}")

        # Make sure we haven't hit the rate limit yet
        calls_remaining = api_calls_remaining( "place" )
        time_before_reset = api_time_before_reset( "place" )

        # If we're close to hitting the rate limit for the trends/place API,
        # then wait until the next reset =
        # 'time_before_reset' minutes + 1 minute buffer
        if (calls_remaining < 2):
            print (f">> Waiting {time_before_reset} minutes due to rate limit")
            time.sleep( (time_before_reset+1) * 60)

        # Get trend info for a WOEID location
        t_info = get_trends_for_loc(tw_woeid)

        try:
            # Create a DataFrame
            t_info_df = pd.DataFrame.from_dict(t_info)
            
            # Delete any trends associated with this WOEID
            # before appending new trends to the 'trends' table for this WOEID
            
            # CHANGED FOR GeoTweet+: Keep all entries - don't delete them!
            # db.session.query(Trend).filter(Trend.woeid == tw_woeid).delete()
            # db.session.commit()

            # Append trends for this WOEID to the 'trends' database table
            t_info_df.to_sql( 'trends', con=db.engine, if_exists='append', index=False)
            db.session.commit()

            # Increment the count
            num_location_trends_written_to_db += 1

        except:
            print(f">> Error occurred with location {tw_woeid} while attempting to prepare and write trends data")
            
    return num_location_trends_written_to_db

## Other Helper Functions Supporting Queries

In [16]:
def parse_date_range(a_date_range = None):
# Function to parse date ranges specified with the Flask API '/period' routes
# Note, 
# Arguments: Single string a_date_range with possible formats:
#     a_date_range = "2019-03-01"    ->   ">= 3/1/19"
#     a_date_range = ":2019-06-01"    ->   "<= 6/30/19"
#     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
#     a_date_range = "all"  -> all dates
#     a_date_range = ":"  -> same as "all"
#     a_date_range = ""   -> same as "all"
#
# Returns:
#     start_date: Earliest date (inclusive), for use in date comparison
#     end_date: Latest date (inclusive), for use in date comparison
#     If either date cannot be parsed, an error message is returned

    # Max and Min dates
    DATE_EARLIEST_POSSIBLE = parser.parse("2000-01-01").date()
    DATE_LATEST_POSSIBLE = parser.parse("2100-12-31").date()

    # Initialize default return valus - no date restriction
    start_date = DATE_EARLIEST_POSSIBLE
    end_date = DATE_LATEST_POSSIBLE
    
    # Parse the argument to obtain the start and end dates - if provided
    
    # If no argument provided, provide full date range (i.e., no date restriction)
    if a_date_range is None:
        # Return default values
        return (start_date, end_date)

    # Prep the date range for additional processing
    date_range = a_date_range.strip().lower()
    
    # Check for "all" and similar indications of no date restriction
    if date_range == "all" or date_range == "" or date_range == ":" :
        # Return default values
        return (start_date, end_date)
    
    # Attempt to split the date range (seperator = ":")
    arg_list = a_date_range.split(":")
    
    # If only one argument provided (i.e., no ":")
    # then restrict date range to just that one date
    if len(arg_list) == 1:
        try:
            start_date = parser.parse(arg_list[0]).date()
            end_date = start_date
            
        except ValueError:
            start_date = f"ERROR"
            end_date = start_date

        return (start_date, end_date)
    
    # At least 2 args provided, so assume they are start and end dates
    
    # Populate start date if the argument is populated, otherwise leave the default
    if len(arg_list[0])>0:
        try:
            start_date = parser.parse(arg_list[0]).date()
        except ValueError:
            start_date = f"ERROR"

    # Populate end date if the argument is populated, otherwise leave the default
    if len(arg_list[1])>0:
        try:
            end_date = parser.parse(arg_list[1]).date()
        except ValueError:
            end_date =  f"ERROR"

    # Get the date range from the arguments
    return (start_date, end_date)

## DB Management: Twitter Tweet info

In [17]:
# # Function Definitions: Twitter Tweet Info
def search_for_tweets( a_search_term ):
# Get a list of specific tweets associated with search term a_search_term,
# flatten the relevant data, and return it as a list of dictionaries

    # Number of tweets per page (up to 100) to be returned from the API query
    tweets_count_limit = 100       # PRODUCTION
    # tweets_count_limit = 5         # DEBUG
    
    try:
        # Perform API search query and obtain only the 1st page of results
        tweets = api.search(quote(a_search_term), lang='en', count=tweets_count_limit)
        
    except:
        # No tweet info available for this search term, return False
        print(f"Tweepy API Error: Problem getting tweet information for search term {a_search_term}")
        return False
    
    
    # Create a list of dictionaries of Tweets info associated with a_search_term
    tweet_list = []

    # Repeat some information that is common for all elements in the tweet list
    common_info = {
        'updated_at': datetime.utcnow(),
        'tweet_search_term': a_search_term
    }

    # Loop through each tweet in the tweet search results
    for t in tweets['statuses']:
        
        # Start the dictionary with some common information
        tweet_info = dict(common_info)

        # Info about this Tweet (i.e., "Status")
        try:            
            tweet_info.update( {                
                'tweet_id': t['id'],
                'tweet_id_str': t['id_str'],
                'tweet_created_at': t['created_at'],
                'tweet_text': t['text'],
                'tweet_lang': t['lang'],
                'tweet_source': t['source'],
                'tweet_is_a_quote_flag': t['is_quote_status'],    # If True, then this is a Quoted Tweet (i.e., Tweet w/ comments/mods)
            })

        except:
            print(f"Tweepy API Error: Problem getting tweet-related info")

        # If the 'retweeted_status' key exists in the results,
        # then this Tweet is a Retweet (i.e., Tweet forwarded "as is")
        if 'retweeted_status' in t:
            tweet_info.update( { 'tweet_is_a_retweet_flag': True })
        else:
            tweet_info.update( { 'tweet_is_a_retweet_flag': False })

        # Counts associated with the tweet
        try:            
            tweet_info.update( {                
                'tweet_entities_hashtags_count': len(t['entities']['hashtags']),
                'tweet_entities_user_mentions_count': len(t['entities']['user_mentions']),
                'tweet_favorite_counts': t['favorite_count'],
                'tweet_retweet_counts': t['retweet_count'],
            })

        except:
            print(f"Tweepy API Error: Problem getting tweet-related info")
        
        # User who created this Tweet
        try:
            tweet_info.update( {                
                'tweet_user_id': t['user']['id'],
                'tweet_user_id_str': t['user']['id_str'],
                'tweet_user_created_at': t['user']['created_at'],
                'tweet_user_name': t['user']['name'],
                'tweet_user_screen_name': t['user']['screen_name'],
                'tweet_user_description': t['user']['description'],
                'tweet_user_lang': t['user']['lang'],
                'tweet_user_statuses_count': t['user']['statuses_count'],     # No. of Tweets/Retweets issued by this user
                'tweet_user_favourites_count': t['user']['favourites_count'],    # No. of Tweets this user has liked (in account's lifetime)
                'tweet_user_followers_count': t['user']['followers_count'],     # No. of Followers this account currently has
                'tweet_user_friends_count': t['user']['friends_count'],       # No. of Users this account is following
                'tweet_user_listed_count': t['user']['listed_count']        # No. of Public lists this user is a member of
            })
            
        except:
            print(f"Tweepy API Error: Problem getting user-related info")            

        # Append this tweet to the list
        tweet_list.append( tweet_info )
        
        # DEBUG *******************************************************************
        # print(f">>> In search_for_tweets( '{a_search_term}' ) - Just appended tweet_info:")
        # pprint(tweet_info)
        
        # print(f">>> In search_for_tweets( '{a_search_term}' ) - tweet_list is now:")
        # pprint(tweet_list)

    
    return(tweet_list)
        
#     return trend_info

In [18]:
def get_search_terms_from_trends(a_date_range=None):
# Get a list of the unique tweet search terms specified in
# the 'trends' table.
# Ensure that all tweets in the list are unique by using a Python "set"
    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)

    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        search_term_list = [{'ERROR': 'ERROR'}]
        # return jsonify(search_term_list)
        return(search_term_list)
    
    # Query to get the search_terms (i.e., 'twitter_tweet_name')
    # from the 'trends' table for the specified date range
    results = db.session.query(Trend.twitter_tweet_name) \
                .filter( and_( \
                        func.date(Trend.updated_at) >= q_start_date, \
                        func.date(Trend.updated_at) <= q_end_date \
                       )) \
                .order_by( Trend.twitter_tweet_name ).all()

    # Get the list of unique search terms using set()
    # Note: The results list is a list of tuples, with first tuple being the desired value
    search_term_set = set([ t[0] for t in results])

    # To support the hashtag/no hashtag Tweet Analysis,
    # add the complementary tweet to the table for each unique tweet
    search_term_alt_set = set([ f"{y[1:]}" if y[:1] == "#" else f"#{y}" for y in search_term_set ])

    # Combined the sets
    search_term_set.update(search_term_alt_set)
    
    # Return a list
    search_term_list = sorted(list(search_term_set))

    #     return jsonify(search_term_list)
    return(search_term_list)


In [19]:
def get_search_terms_from_tweets(a_date_range=None):
# Get a list of the unique tweet search terms specified in
# the 'tweets' table.
# Ensure that all tweets in the list are unique by using a Python "set"
    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)

    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        search_term_list = [{'ERROR': 'ERROR'}]
        # return jsonify(search_term_list)
        return(search_term_list)
    
    # Query to get the search_terms (i.e., 'twitter_tweet_name')
    # from the 'tweets' table for the specified date range
    results = db.session.query(Tweet.tweet_search_term) \
                .filter( and_( \
                        func.date(Tweet.updated_at) >= q_start_date, \
                        func.date(Tweet.updated_at) <= q_end_date \
                       )) \
                .order_by( Tweet.tweet_search_term ).all()

    # Get the list of unique search terms using set()
    # Note: The results list is a list of tuples, with first tuple being the desired value
    search_term_set = set([ t[0] for t in results])

    # To support the hashtag/no hashtag Tweet Analysis,
    # add the complementary tweet to the table for each unique tweet
    search_term_alt_set = set([ f"{y[1:]}" if y[:1] == "#" else f"#{y}" for y in search_term_set ])

    # Combined the sets
    search_term_set.update(search_term_alt_set)
    
    # Return a list
    search_term_list = sorted(list(search_term_set))

    #     return jsonify(search_term_list)
    return(search_term_list)


In [20]:
def get_tweet_list():
# Based upon the search terms in 'trends' and 'tweets' tables,
# use the Twitter Search API to get tweets for search terms
# that are in the 'trends' table but not already in the 'tweet' table
    
    # Get all of the Twitter search terms in the 'trends' table
    trends_search_term_list = get_search_terms_from_trends()
    
    # Get the Twitter search terms from the 'tweets' table
    # and remove existing search terms from the list of search terms
    # for which api calls will be performed --> Minimizes API calls
    tweets_search_term_list = get_search_terms_from_tweets()
    
    # Create a list of search terms that include all terms from the 'trends'
    # table and removes all those already in the 'tweets' table
    add_search_term_list = list( set(trends_search_term_list) - set(tweets_search_term_list) )
    print( f"Search Terms - Trends: {len(trends_search_term_list)}, Tweets {len(tweets_search_term_list)}, Add: {len(add_search_term_list)}" )        
    
    #DEBUG *******************************************************************************************
    # return add_search_term_list
    #DEBUG *******************************************************************************************

    # Loop through each search term and perform
    # a search for tweets associated with that term
    tweet_list = []
    search_term_count = 0
    
    for s in add_search_term_list:
        
        # Check the rate limits to see if there's enough left to make a search
        try:
            retval = api_rate_limits()
            searches_remaining = retval['search']['/search/tweets']['remaining']
    
        except:
            # Most likely hit rate limits -- break out of the loop and process what we have so far
            print("POSSIBLE RATE LIMITS: search tweets 'remaining' not populated in API results")
            break
            
        # If searches remaining are too low -- break out of the loop and process what we have so far
        if searches_remaining < 10:
            print("RATE LIMITS: Too close to rate limits to perform additional searches")
            break
                
        # Get Tweets for this Twitter search term
        tweets_for_this_search_term = search_for_tweets(s)
        print(f"Search Term '{s}' => Tweet Count: {len(tweets_for_this_search_term)}")
        
        # Build a list of Tweets
        tweet_list.extend( tweets_for_this_search_term )
        
        search_term_count += 1
        
        # DEBUG *******************************************************************************
        # if search_term_count > 10:
        #    break
        # DEBUG *******************************************************************************
    
    print(f"OVERALL => Tweet Count: {len(tweet_list)}, API Search Calls: {search_term_count}")
    
    # Return the tweet_list - for debugging purposes
    return tweet_list

In [21]:
def update_db_tweets_table(a_tweet_list):
# Update the tweets table by adding tweets for each
# twitter search term in the 'trends' table
#
# Arguments:
#    a_tweet_list: A list of tweets generated by get_tweet_list()
#                  to be added to the 'tweets' table
       
    print(f"Tweets to add to the 'tweets' table: {len(a_tweet_list)}")
    
#     try:
#         # Create a DataFrame
#         tweet_df = pd.DataFrame.from_dict(tweet_list)

#         # Append tweets the 'trends' database table
#         tweet_df.to_sql( 'tweets', con=db.engine, if_exists='append', index=False)
#         db.session.commit()

#         # Increment the count
#         print(f"Wrote {len(tweet_list)} tweets to the 'Tweets' table")
        
#     except:
#         print(f">> Error occurred while attempting to  write tweets data")
        
    # Return the total number of entries in the Locations table
    num_tweets_start = db.session.query(Tweet).count()

    # Loop through all tweet entries
    n_adds = 0
    n_error_adds = 0
    n_updates = 0
    n_error_updates = 0
    for t in a_tweet_list:
        
        # Search for this tweet in the 'tweets' table -- just in case it's there
        result = db.session.query(Tweet).filter( Tweet.tweet_id_str == t['tweet_id_str'] ).first()

        if result is None:
            # This tweet is not in the table, so add this entrry to the 'tweets' table.
            # NOTE: 
            # Tweet is the Class mapped to the 'tweet' table
            # t is a dictionary containing all of the column values for this row as key/value pairs
            # The term "**t" creates a "key=value" parameter for each key/value pair
            try:
                db.session.add( Tweet(**t) )
                db.session.commit()
                n_adds += 1
                print(f">>> ADDED: Record to 'tweets': Search Term '{t['tweet_search_term']}' => Tweet ID: '{t['tweet_id_str']}'")
                
            except:
                n_error_adds += 1
                print(f">>> ADD: Error while attempting to add record to 'tweets': Search Term '{t['tweet_search_term']}' => Tweet ID: '{t['tweet_id_str']}'")
                db.session.rollback()
            
        else:
            # DEBUG *************************************************************************************
            # print(result)
            # DEBUG *************************************************************************************
            
            # This tweet is in the table, so update this entry in the 'tweets' table.            
            try:
                db.session.query(Tweet).filter( Tweet.tweet_id_str == t['tweet_id_str'] ).update( t )
                db.session.commit()
                n_updates += 1
                print(f">>> UPDATED: Record in 'tweets': Search Term '{t['tweet_search_term']}' => Tweet ID: '{t['tweet_id_str']}'")
                
            except:
                n_error_updates += 1
                print(f">>> UPDATE: Error while attempting to add record to 'tweets': Search Term '{t['tweet_search_term']}' => Tweet ID: '{t['tweet_id_str']}'")
                db.session.rollback()
                
    # Return the total number of entries in the Locations table
    num_tweets_finish = db.session.query(Tweet).count()
    
    print(f"COMPLETE: ADDS: [{n_adds} success, {n_error_adds} error], UPDATES: [{n_updates} success, {n_error_updates}] error => 'tweets' table rows: {num_tweets_start}->{num_tweets_finish}")
    
    retval = {
        'n_tweet_list_input': len(a_tweet_list),
        'n_tweet_table_entries_start': num_tweets_start,
        'n_tweet_table_entries_finish': num_tweets_finish,
        
        'n_adds': n_adds,
        'n_error_adds': n_error_adds,
        'n_updates': n_updates,
        'n_error_updates': n_error_updates
    }
    
    # Return the counts of add/update actions
    return retval

In [22]:
# print(api_calls_remaining("available"))

In [23]:
# n_loc = update_db_locations_table()
# print(n_loc)

In [24]:
# print(api_calls_remaining("place"))

In [25]:
# n_trends = update_db_trends_table()
# print(n_trends)

In [26]:
# print(api_calls_remaining("place"))

In [27]:
# api.rate_limit_status()

In [28]:
# pprint(api_rate_limits() )

In [37]:
pprint(api_rate_limits()['search']['/search/tweets'] )

{'limit': 180, 'remaining': 110, 'reset': 1557175228}


In [38]:
tweet_list = get_tweet_list()
len(tweet_list)

Search Terms - Trends: 874, Tweets 870, Add: 4
Search Term '#doesnthelooktired' => Tweet Count: 0
Search Term 'modecisionday' => Tweet Count: 100
Search Term '#modecisionday' => Tweet Count: 100
Search Term 'doesnthelooktired' => Tweet Count: 0
OVERALL => Tweet Count: 200, API Search Calls: 4


200

In [None]:
update_status = update_db_tweets_table(tweet_list)

Tweets to add to the 'tweets' table: 200
>>> UPDATED: Record in 'tweets': Search Term 'modecisionday' => Tweet ID: '1125380997258723330'
>>> UPDATED: Record in 'tweets': Search Term 'modecisionday' => Tweet ID: '1124480872047161344'
>>> UPDATED: Record in 'tweets': Search Term 'modecisionday' => Tweet ID: '1124480147703287808'
>>> UPDATED: Record in 'tweets': Search Term 'modecisionday' => Tweet ID: '1124443487133474816'
>>> UPDATED: Record in 'tweets': Search Term 'modecisionday' => Tweet ID: '1124443481391472641'


In [36]:
abc = db.session.query(Tweet).order_by( Tweet.tweet_search_term ).all()
print(len(abc))
# for r in abc:
#     pprint(f"{r.tweet_search_term}: {r.tweet_id_str} [{r.updated_at}] => {r.tweet_user_screen_name}")

39031


In [None]:
# db.create_all()

# Code From: app.py - Flask app routes

In [None]:
#********************************************************************************
# Default route - display the main page
# NOTE: Flask expects rendered templates to be in the ./templates folder
@app.route("/")
def home():
    return render_template("index.html")

In [None]:
#********************************************************************************
# Return information relevant to update
# of the 'locations' and 'trends' database tables
@app.route("/update")
def update_info():
    # Obtain remaining number of API calls for trends/place
    api_calls_remaining_place = api_calls_remaining( "place")

    # Obtain time before rate limits are reset for trends/available
    api_time_before_reset_place = api_time_before_reset( "place")

    # Obtain remaining number of API calls for trends/place
    api_calls_remaining_available = api_calls_remaining( "available")

    # Obtain time before rate limits are reset for trends/available
    api_time_before_reset_available = api_time_before_reset( "available")

    # Count the number of locations in the 'locations' table
    n_locations = db.session.query(Location).count()

    # Count the number of total trends in the 'trends' table
    n_trends = db.session.query(Trend).count()

    # Provide the average number of Twitter Trends provided per location
    # Use try/except to catch divide by zero
    try:
        n_trends_per_location_avg = n_trends / n_locations
    except ZeroDivisionError:
        n_trends_per_location_avg = None

    api_info = {
        'api_calls_remaining_place': api_calls_remaining_place,
        'api_time_before_reset_place': api_time_before_reset_place,
        'api_calls_remaining_available': api_calls_remaining_available,
        'api_time_before_reset_available': api_time_before_reset_available,
        'n_locations': n_locations,
        'n_trends': n_trends,
        'n_trends_per_location_avg' : n_trends_per_location_avg
    }

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(api_info)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (api_info)


In [None]:
#********************************************************************************
# Return information relevant to update
# of the 'locations' and 'trends' database tables
@app.route("/update/other")
def update_info_other():
    # Obtain the full set rate limits info
    api_info = api_rate_limits()

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(api_info)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (api_info)


In [None]:
#********************************************************************************
# Update the 'locations' table via API calls
# Note: Typically requires less than 1 minute
@app.route("/update/locations")
def update_locations_table():
    # Update the locations table through API calls
    n_locations = update_db_locations_table()

    api_info = {
        'n_locations': n_locations
    }

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(api_info)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (api_info)


In [None]:
#********************************************************************************
# Update the 'trends' table via API calls
# Note: Typically requires less than 1 minute if no rate limits
#       But require up to 15 minutes if rate limits are in effect
@app.route("/update/trends")
def update_trends_table():
    # Update the trends table through API calls
    n_location_trends = update_db_trends_table()

    api_info = {
        'n_location_trends': n_location_trends
    }

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(api_info)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (api_info)


In [None]:
#********************************************************************************
# Return a list of all locations with Twitter Top Trend info
@app.route("/locations")
def get_all_locations():
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time
    # results = db.session.query(Location).all()
        
    # Create a subquery to find the most recent "updated_at" record per woeid
    loc_subq = db.session.query(Location.woeid, func.max(Location.updated_at).label("max_updated_at")) \
                            .group_by(Location.woeid).subquery()

    results = db.session.query(Location) \
                            .filter( and_( \
                                Location.woeid == loc_subq.c.woeid, \
                                Location.updated_at == loc_subq.c.max_updated_at \
                            )).order_by(Location.woeid).all()

    loc_list = []
    for r in results:
        loc_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'latitude': r.latitude,
            'longitude': r.longitude,
            'name_full': r.name_full,
            'name_only': r.name_only,
            'name_woe': r.name_woe,
            'county_name': r.county_name,
            'county_name_only': r.county_name_only,
            'county_woeid': r.county_woeid,
            'state_name': r.state_name,
            'state_name_only': r.state_name_only,
            'state_woeid': r.state_woeid,
            'country_name': r.country_name,
            'country_name_only': r.country_name_only,
            'country_woeid': r.country_woeid,
            'place_type': r.place_type,
            'timezone': r.timezone,
            'twitter_type': r.twitter_type,
            'twitter_country': r.twitter_country,
            'tritter_country_code': r.tritter_country_code,
            'twitter_name': r.twitter_name,
            'twitter_parentid': r.twitter_parentid
        }

        # loc_info = {
        #     'woeid': r.Location.woeid,
        #     'latitude': r.Location.latitude,
        #     'longitude': r.Location.longitude,
        #     'name_full': r.Location.name_full,
        #     'name_only': r.Location.name_only,
        #     'name_woe': r.Location.name_woe,
        #     'county_name': r.Location.county_name,
        #     'county_name_only': r.Location.county_name_only,
        #     'county_woeid': r.Location.county_woeid,
        #     'state_name': r.Location.state_name,
        #     'state_name_only': r.Location.state_name_only,
        #     'state_woeid': r.Location.state_woeid,
        #     'country_name': r.Location.country_name,
        #     'country_name_only': r.Location.country_name_only,
        #     'country_woeid': r.Location.country_woeid,
        #     'place_type': r.Location.place_type,
        #     'timezone': r.Location.timezone,
        #     'twitter_type': r.Location.twitter_type,
        #     'twitter_country': r.Location.twitter_country,
        #     'tritter_country_code': r.Location.tritter_country_code,
        #     'twitter_parentid': r.Location.twitter_parentid,

        #     'twitter_as_of': r.Trend.twitter_as_of,
        #     'twitter_created_at': r.Trend.twitter_created_at,
        #     'twitter_name': r.Trend.twitter_name,
        #     'twitter_tweet_name': r.Trend.twitter_tweet_name,
        #     'twitter_tweet_promoted_content': r.Trend.twitter_tweet_promoted_content,
        #     'twitter_tweet_query': r.Trend.twitter_tweet_query,
        #     'twitter_tweet_url': r.Trend.twitter_tweet_url,
        #     'twitter_tweet_volume': r.Trend.twitter_tweet_volume
        # }

        loc_list.append(loc_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(loc_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (loc_list)


In [None]:
#********************************************************************************
# Return a list of all locations with Twitter Top Trend info
@app.route("/locations/interval/<a_date_range>")
def get_interval_all_locations(a_date_range):
    # Query to obtain all locations in the 'locations' table
    # for which 'updated_at' is within the specified date range
    #     a_date_range = "2019-03-01"             ->   ">= 3/1/19"
    #     a_date_range = ":2019-06-01"            ->   "<= 6/30/19"
    #     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
    #     a_date_range = "all"                    ->    all dates
    #     a_date_range = ":"                      ->    same as "all"
    #     a_date_range = ""                       ->    same as "all"

    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)
    
    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        loc_list = [{'ERROR': 'ERROR'}]
        return jsonify(loc_list)
    
    results = db.session.query(Location) \
                            .filter( and_( \
                                func.date(Location.updated_at) >= q_start_date, \
                                func.date(Location.updated_at) <= q_end_date \
                            )).order_by(Location.woeid).all()

    loc_list = []
    for r in results:
        loc_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'latitude': r.latitude,
            'longitude': r.longitude,
            'name_full': r.name_full,
            'name_only': r.name_only,
            'name_woe': r.name_woe,
            'county_name': r.county_name,
            'county_name_only': r.county_name_only,
            'county_woeid': r.county_woeid,
            'state_name': r.state_name,
            'state_name_only': r.state_name_only,
            'state_woeid': r.state_woeid,
            'country_name': r.country_name,
            'country_name_only': r.country_name_only,
            'country_woeid': r.country_woeid,
            'place_type': r.place_type,
            'timezone': r.timezone,
            'twitter_type': r.twitter_type,
            'twitter_country': r.twitter_country,
            'tritter_country_code': r.tritter_country_code,
            'twitter_name': r.twitter_name,
            'twitter_parentid': r.twitter_parentid
        }

        loc_list.append(loc_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(loc_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (loc_list)


In [None]:
#********************************************************************************
# Return a list of one location  with Twitter Top Trend info with teh specified WOEID
@app.route("/locations/<a_woeid>")
def get_info_for_location(a_woeid):
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time
    # results = db.session.query(Location) \
    #                     .filter(Location.woeid == a_woeid) \
    #                     .all()
        
    # Create a subquery to find the most recent "updated_at" record per woeid
    loc_subq = db.session.query(Location.woeid, func.max(Location.updated_at).label("max_updated_at")) \
                            .group_by(Location.woeid).subquery()

    results = db.session.query(Location) \
                            .filter( and_( \
                                Location.woeid == a_woeid, \
                                Location.woeid == loc_subq.c.woeid, \
                                Location.updated_at == loc_subq.c.max_updated_at \
                            )).order_by(Location.woeid).all()
    
    loc_list = []
    for r in results:
        loc_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'latitude': r.latitude,
            'longitude': r.longitude,
            'name_full': r.name_full,
            'name_only': r.name_only,
            'name_woe': r.name_woe,
            'county_name': r.county_name,
            'county_name_only': r.county_name_only,
            'county_woeid': r.county_woeid,
            'state_name': r.state_name,
            'state_name_only': r.state_name_only,
            'state_woeid': r.state_woeid,
            'country_name': r.country_name,
            'country_name_only': r.country_name_only,
            'country_woeid': r.country_woeid,
            'place_type': r.place_type,
            'timezone': r.timezone,
            'twitter_type': r.twitter_type,
            'twitter_country': r.twitter_country,
            'tritter_country_code': r.tritter_country_code,
            'twitter_name': r.twitter_name,
            'twitter_parentid': r.twitter_parentid
        }

        loc_list.append(loc_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(loc_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (loc_list)


In [None]:
#********************************************************************************
# Return a list of all locations with Twitter Top Trend info
@app.route("/locations/interval/<a_date_range>/<a_woeid>")
def get_interval_info_for_location(a_date_range, a_woeid):
    # Query to obtain all locations in the 'locations' table
    # for which 'updated_at' is within the specified date range
    #     a_date_range = "2019-03-01"             ->   ">= 3/1/19"
    #     a_date_range = ":2019-06-01"            ->   "<= 6/30/19"
    #     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
    #     a_date_range = "all"                    ->    all dates
    #     a_date_range = ":"                      ->    same as "all"
    #     a_date_range = ""                       ->    same as "all"

    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)
    
    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        loc_list = [{'ERROR': 'ERROR'}]
        return jsonify(loc_list)
    
    results = db.session.query(Location) \
                            .filter( and_( \
                                Location.woeid == a_woeid, \
                                func.date(Location.updated_at) >= q_start_date, \
                                func.date(Location.updated_at) <= q_end_date \
                            )).order_by(Location.woeid).all()

    loc_list = []
    for r in results:
        loc_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'latitude': r.latitude,
            'longitude': r.longitude,
            'name_full': r.name_full,
            'name_only': r.name_only,
            'name_woe': r.name_woe,
            'county_name': r.county_name,
            'county_name_only': r.county_name_only,
            'county_woeid': r.county_woeid,
            'state_name': r.state_name,
            'state_name_only': r.state_name_only,
            'state_woeid': r.state_woeid,
            'country_name': r.country_name,
            'country_name_only': r.country_name_only,
            'country_woeid': r.country_woeid,
            'place_type': r.place_type,
            'timezone': r.timezone,
            'twitter_type': r.twitter_type,
            'twitter_country': r.twitter_country,
            'tritter_country_code': r.tritter_country_code,
            'twitter_name': r.twitter_name,
            'twitter_parentid': r.twitter_parentid
        }

        loc_list.append(loc_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(loc_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (loc_list)


In [None]:
#********************************************************************************
# Return a list of all locations that have the specified tweet in its top trends
# and then sort the results by tweet volume in descending order (with NULLs last)
@app.route("/locations/tweet/<a_tweet>")
def get_locations_with_tweet(a_tweet):
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time

    # Create a subquery to find the most recent locations table "updated_at" record per woeid
    loc_subq = db.session.query(Location.woeid, func.max(Location.updated_at).label("max_loc_updated_at")) \
                            .group_by(Location.woeid).subquery()
    
    # Create a subquery to find the most recent trends table "updated_at" record per woeid
    trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at).label("max_trend_updated_at")) \
                            .group_by(Trend.woeid).subquery() 
    
    results = db.session.query(Trend, Location).join(Location) \
                            .filter( and_( \
                                Trend.twitter_tweet_name == a_tweet, \
                                Trend.woeid == trend_subq.c.woeid, \
                                Trend.updated_at == trend_subq.c.max_trend_updated_at, \
                                Location.woeid == loc_subq.c.woeid, \
                                Location.updated_at == loc_subq.c.max_loc_updated_at \
                                )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).all()

    loc_list = []
    for r in results:
        #print(f"Trend Information for {r.Trend.woeid} {r.Location.name_full}: {r.Trend.twitter_tweet_name} {r.Trend.twitter_tweet_volume}")
        loc_info = {
            'loc_updated_at': r.Location.updated_at,
            'woeid': r.Location.woeid,
            'latitude': r.Location.latitude,
            'longitude': r.Location.longitude,
            'name_full': r.Location.name_full,
            'name_only': r.Location.name_only,
            'name_woe': r.Location.name_woe,
            'county_name': r.Location.county_name,
            'county_name_only': r.Location.county_name_only,
            'county_woeid': r.Location.county_woeid,
            'state_name': r.Location.state_name,
            'state_name_only': r.Location.state_name_only,
            'state_woeid': r.Location.state_woeid,
            'country_name': r.Location.country_name,
            'country_name_only': r.Location.country_name_only,
            'country_woeid': r.Location.country_woeid,
            'place_type': r.Location.place_type,
            'timezone': r.Location.timezone,
            'twitter_type': r.Location.twitter_type,
            'twitter_country': r.Location.twitter_country,
            'tritter_country_code': r.Location.tritter_country_code,
            'twitter_parentid': r.Location.twitter_parentid,

            'trend_updated_at': r.Trend.updated_at,
            'twitter_as_of': r.Trend.twitter_as_of,
            'twitter_created_at': r.Trend.twitter_created_at,
            'twitter_name': r.Trend.twitter_name,
            'twitter_tweet_name': r.Trend.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.Trend.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.Trend.twitter_tweet_query,
            'twitter_tweet_url': r.Trend.twitter_tweet_url,
            'twitter_tweet_volume': r.Trend.twitter_tweet_volume
        }

        loc_list.append(loc_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(loc_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (loc_list)


In [None]:
#********************************************************************************
# Return a list of all locations that have the specified tweet in its top trends
# and then sort the results by tweet volume in descending order (with NULLs last)
@app.route("/locations/interval/<a_date_range>/tweet/<a_tweet>")
def get_interval_locations_with_tweet(a_date_range, a_tweet):
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time
    #     a_date_range = "2019-03-01"             ->   ">= 3/1/19"
    #     a_date_range = ":2019-06-01"            ->   "<= 6/30/19"
    #     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
    #     a_date_range = "all"                    ->    all dates
    #     a_date_range = ":"                      ->    same as "all"
    #     a_date_range = ""                       ->    same as "all"

    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)
    
    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        trend_list = [{'ERROR': 'ERROR'}]
        return jsonify(trend_list)
    
    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    # In the order_by clause, use the coalesce() function to replace all NULL values
    # in the twitter_tweet_volume field with -9999 for the purpose of the sort in descending order
    results = db.session.query(Trend, Location).join(Location) \
                            .filter( and_( \
                                Trend.twitter_tweet_name == a_tweet, \
                                func.date(Trend.updated_at) >= q_start_date, \
                                func.date(Trend.updated_at) <= q_end_date \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).all()

    loc_list = []
    for r in results:
        #print(f"Trend Information for {r.Trend.woeid} {r.Location.name_full}: {r.Trend.twitter_tweet_name} {r.Trend.twitter_tweet_volume}")
        loc_info = {
            'loc_updated_at': r.Location.updated_at,
            'woeid': r.Location.woeid,
            'latitude': r.Location.latitude,
            'longitude': r.Location.longitude,
            'name_full': r.Location.name_full,
            'name_only': r.Location.name_only,
            'name_woe': r.Location.name_woe,
            'county_name': r.Location.county_name,
            'county_name_only': r.Location.county_name_only,
            'county_woeid': r.Location.county_woeid,
            'state_name': r.Location.state_name,
            'state_name_only': r.Location.state_name_only,
            'state_woeid': r.Location.state_woeid,
            'country_name': r.Location.country_name,
            'country_name_only': r.Location.country_name_only,
            'country_woeid': r.Location.country_woeid,
            'place_type': r.Location.place_type,
            'timezone': r.Location.timezone,
            'twitter_type': r.Location.twitter_type,
            'twitter_country': r.Location.twitter_country,
            'tritter_country_code': r.Location.tritter_country_code,
            'twitter_parentid': r.Location.twitter_parentid,

            'trend_updated_at': r.Trend.updated_at,
            'twitter_as_of': r.Trend.twitter_as_of,
            'twitter_created_at': r.Trend.twitter_created_at,
            'twitter_name': r.Trend.twitter_name,
            'twitter_tweet_name': r.Trend.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.Trend.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.Trend.twitter_tweet_query,
            'twitter_tweet_url': r.Trend.twitter_tweet_url,
            'twitter_tweet_volume': r.Trend.twitter_tweet_volume
        }

        loc_list.append(loc_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(loc_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (loc_list)


In [None]:
#********************************************************************************
# Return the full list of all trends with Twitter Top Trend info
@app.route("/trends")
def get_all_trends():
    # Query to obtain all trends in the 'trends' table
    # REVISED FOR GeoTweet+: Needs to account for retention of trends over time
    # results = db.session.query(Trend).all()

    # Create a subquery to find the most recent "updated_at" record per woeid
    trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at).label("max_updated_at")) \
                                .group_by(Trend.woeid).subquery()

    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                            .filter( and_(
                                    Trend.woeid == trend_subq.c.woeid, \
                                    Trend.updated_at == trend_subq.c.max_updated_at \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).all()

    
    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(trend_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (trend_list)


In [None]:
#********************************************************************************
# Return the full list of all trends with Twitter Top Trend info
@app.route("/trends/interval/<a_date_range>")
def get_interval_all_trends(a_date_range):
    # Query to obtain all trends in the 'trends' table
    # for which 'updated_at' is within the specified date range
    #     a_date_range = "2019-03-01"             ->   ">= 3/1/19"
    #     a_date_range = ":2019-06-01"            ->   "<= 6/30/19"
    #     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
    #     a_date_range = "all"                    ->    all dates
    #     a_date_range = ":"                      ->    same as "all"
    #     a_date_range = ""                       ->    same as "all"

    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)
    
    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        trend_list = [{'ERROR': 'ERROR'}]
        return jsonify(trend_list)
    
    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                            .filter( and_( \
                                func.date(Trend.updated_at) >= q_start_date, \
                                func.date(Trend.updated_at) <= q_end_date \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).all()
    
    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(trend_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (trend_list)


In [None]:
#********************************************************************************
# Return the full list of Twitter Top Trends for a specific location
# and then sort the results by tweet volume in descending order (with NULLs last)
@app.route("/trends/<a_woeid>")
def get_trends_for_location(a_woeid):
    # Query to obtain all trends in the 'trends' table
    # REVISED FOR GeoTweet+: Needs to account for retention of trends over time
    # results = db.session.query(Trend).filter(Trend.woeid == a_woeid) \
    #                    .order_by(Trend.twitter_tweet_volume.desc().nullslast() ) \
    #                    .all()

    # Create a subquery to find the most recent "updated_at" record per woeid
    trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at).label("max_updated_at")) \
                                .group_by(Trend.woeid).subquery()

    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                            .filter( and_( \
                                Trend.woeid == a_woeid, \
                                Trend.woeid == trend_subq.c.woeid, \
                                Trend.updated_at == trend_subq.c.max_updated_at \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).all()
    
    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(trend_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (trend_list)


In [None]:
#********************************************************************************
# Return the full list of all trends with Twitter Top Trend info
@app.route("/trends/interval/<a_date_range>/<a_woeid>")
def get_interval_trends_for_location(a_date_range, a_woeid):
    # Query to obtain all trends in the 'trends' table
    # for which 'updated_at' is within the specified date range
    #     a_date_range = "2019-03-01"             ->   ">= 3/1/19"
    #     a_date_range = ":2019-06-01"            ->   "<= 6/30/19"
    #     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
    #     a_date_range = "all"                    ->    all dates
    #     a_date_range = ":"                      ->    same as "all"
    #     a_date_range = ""                       ->    same as "all"

    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)
    
    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        trend_list = [{'ERROR': 'ERROR'}]
        return jsonify(trend_list)
    
    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                            .filter( and_( \
                                Trend.woeid == a_woeid, \
                                func.date(Trend.updated_at) >= q_start_date, \
                                func.date(Trend.updated_at) <= q_end_date \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).all()

    
    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(trend_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (trend_list)


In [None]:
#********************************************************************************
# Return the top 5 list of Twitter Top Trends for a specific location
# and then sort the results by tweet volume in descending order (with NULLs last)
@app.route("/trends/top/<a_woeid>")
def get_top_trends_for_location(a_woeid):
    # Query to obtain all trends in the 'trends' table
    # REVISED FOR GeoTweet+: Needs to account for retention of trends over time

    # Create a subquery to find the most recent "updated_at" record per woeid
    trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at).label("max_updated_at")) \
                                .group_by(Trend.woeid).subquery()

    results = db.session.query(Trend) \
                            .filter( and_( \
                                Trend.woeid == a_woeid, \
                                Trend.woeid == trend_subq.c.woeid, \
                                Trend.updated_at == trend_subq.c.max_updated_at \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).limit(10).all()

    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(trend_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (trend_list)


In [None]:
#********************************************************************************
# Return the full list of all trends with Twitter Top Trend info
@app.route("/trends/interval/<a_date_range>/top/<a_woeid>")
def get_interval_top_trends_for_location(a_date_range, a_woeid):
    # Query to obtain all trends in the 'trends' table
    # for which 'updated_at' is within the specified date range
    #     a_date_range = "2019-03-01"             ->   ">= 3/1/19"
    #     a_date_range = ":2019-06-01"            ->   "<= 6/30/19"
    #     a_date_range = "2019-03-01:2019-06-30"  ->   ">= 3/1/19 and  <= 6/30/19"
    #     a_date_range = "all"                    ->    all dates
    #     a_date_range = ":"                      ->    same as "all"
    #     a_date_range = ""                       ->    same as "all"

    
    # Parse the date range
    q_start_date, q_end_date = parse_date_range(a_date_range)
    
    # Return with an error if there was a problem parsing the date range
    if q_start_date == "ERROR" or q_end_date == "ERROR":
        trend_list = [{'ERROR': 'ERROR'}]
        return jsonify(trend_list)
    
    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                            .filter( and_( \
                                Trend.woeid == a_woeid, \
                                func.date(Trend.updated_at) >= q_start_date, \
                                func.date(Trend.updated_at) <= q_end_date \
                            )).order_by( coalesce(Trend.twitter_tweet_volume, -9999).desc() ).limit(10).all()

    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

# COMMENTED OUT TO RUN IN JUPYTER NOTEBOOK:    return jsonify(trend_list)
# CODE TO RUN IN JUPYTER NOTEBOOK
    return (trend_list)


In [None]:
# if __name__ == "__main__":
#     app.run()

# Verify Rate Limit Flask Route functions

In [None]:
update_info()

In [None]:
update_info_other()

# Verify DB table update functions using Local Database

In [None]:
# Update locations table
# n_locations = update_db_locations_table()
# print(n_locations)

In [None]:
# Update trends table
# n_location_trends = update_db_trends_table()
# print(n_location_trends)

# Verify Basic DB functions using Local Database

In [None]:
# Testing if support function parse_date_range() is working ok for all input types
for a_date_range in [ "UTC", None, "all", "", ":", "2019-03-01", "2019-03-01:", ":2019-06-01", "2019-03-01:2019-06-30", ":UTC"]:
    (q_start_date, q_end_date) = parse_date_range(a_date_range)
    print(f"a_date_range: '{a_date_range}' => q_start_date '{q_start_date}', q_end_date '{q_end_date}'")

# Verify Basic DB functions using Local Database

In [None]:
# Read all locations
retval = get_all_locations()
print(len(retval))
pprint(retval)

In [None]:
retval = get_interval_all_locations("4/29/19")
print(len(retval))
pprint(retval)

In [None]:
# Read one location - e.g., 2352824 (Albuquerque)
retval = get_info_for_location(2352824)
print(len(retval))
pprint(retval)

In [None]:
retval = get_interval_info_for_location("4/29/19",2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read all trends
retval = get_all_trends()
print(len(retval))
pprint(retval)

In [None]:
# Read all trends
retval = get_interval_all_trends("4/28/19")
print(len(retval))
pprint(retval)

In [None]:
# Read trends for one location - e.g., 2352824 (Albuquerque)
retval = get_trends_for_location(2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read trends for one location - e.g., 2352824 (Albuquerque)
retval = get_interval_trends_for_location("4/28/19", 2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read only the top trends for one location - e.g., 2352824 (Albuquerque)
retval = get_top_trends_for_location(2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read only the top trends for one location - e.g., 2352824 (Albuquerque)
retval = get_interval_top_trends_for_location("4/28/19", 2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read all locations with specified tweet in its trends list - e.g., "#SriLanka"
retval = get_locations_with_tweet("#AvengersEndgame")
print(len(retval))
pprint(retval)

In [None]:
retval = get_interval_locations_with_tweet("","#AvengersEndgame")
print(len(retval))
pprint(retval)

In [None]:
# db.session.close()

# Investigate a new 'tweets' table