In [1]:
# Project 3 - GeoTweet+
# 
# @Author Jeffery Brown (daddyjab)
# @Date 4/21/19
# @File app.py


# import necessary libraries
import os
from flask import Flask, render_template, jsonify, request, redirect

# Import Flask_CORS extension to enable Cross Origin Resource Sharing (CORS)
# when deployed on Heroku
from flask_cors import CORS

#################################################
# Flask Setup
#################################################
app = Flask(__name__)

# Enable Tracking of Flask-SQLAlchemy events for now (probably not needed)
app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = True

# Provide cross origin resource sharing
CORS(app)

#################################################
# Database Setup
#################################################

from flask_sqlalchemy import SQLAlchemy
from sqlalchemy.sql.expression import func, and_, or_
from sqlalchemy.orm import sessionmaker

# sqlAlchemy-utc - provides a helper function utcnow() that will
# help us set the default timestamp of when a record is created
# using UTC time (vs. local time provided by func.now() )
# from sqlalchemy_utc import utcnow

from datetime import datetime, date, timedelta
from dateutil import parser


#Probably don't need these from SQLAlchemy: asc, desc, between, distinct, func, null, nullsfirst, nullslast, or_, and_, not_

In [2]:
# db.session.close()

In [3]:
# Import keys and other info
# postgres_geotweetapp_login
# postgres_geotweetapp_password
from api_config import *



#REVISED PATH HERE WITH JUPYTER NOTEBOOK RUNNING IN `resources` FOLDER: ******************************
# db_path_flask_app = "sqlite:///data/twitter_trends.db"

#REVISED TO SWITCH TO LOCAL DB THROUGH POSTGRESQL
# db_path_flask_app = f"postgresql://{postgres_geotweetapp_login}:{postgres_geotweetapp_password}@localhost/twitter_trends"
# app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get('DATABASE_URL', '') or db_path_flask_app


# REVISED TO USE SQLITE BY DEFAULT, BUT USE POSTGRESQL IF ITS CONFIGURED LOCALLY
# Local DB path for SQLite - default
db_path_flask_app = "sqlite:///data/twitter_trends.db"

# Local DB path for PostgreSQL - use only if login/password populated
try:
    # PostgreSQL Database Login/Password  
    # -- only needed if using a local PostgresSQL instance (vs. SQLite)
    from api_config import (postgres_geotweetapp_login, postgres_geotweetapp_password)

    # If the login and password is populated
    if (postgres_geotweetapp_login is not None) and (postgres_geotweetapp_password is not None):
        db_path_flask_app = f"postgresql://{postgres_geotweetapp_login}:{postgres_geotweetapp_password}@localhost/twitter_trends"
        print("Note: Local PostgreSQL database login/password is populated")

# If the api_config file is not available, then all we can do is flag an error
except ImportError:
    print("Note: Local PostgreSQL database login/password is *not* populated")

app.config['SQLALCHEMY_DATABASE_URI'] = os.environ.get('DATABASE_URL', '') or db_path_flask_app

# Flask-SQLAlchemy database
db = SQLAlchemy(app)

# Import the schema for the Location and Trend tables needed for
# 'twitter_trends.sqlite' database tables 'locations' and 'trends'

#DIRECTLY ADD CODE HERE WITH JUPYTER NOTEBOOK: *****************************************
# from .models import (Location, Trend)

# Database schema for Twitter 'locations' table
class Location(db.Model):
    __tablename__ = 'locations'
    
    # Defining the columns for the table 'locations',
    # which will hold all of the locations in the U.S. for which
    # top trends data is available, as well as location specific
    # info like latitude/longitude
    id = db.Column(db.Integer, primary_key=True)
    updated_at = db.Column( db.DateTime )
    woeid = db.Column(db.Integer, unique=True, nullable=False)
    twitter_country = db.Column(db.String(100))
    tritter_country_code = db.Column(db.String(10))
    twitter_name = db.Column(db.String(250))
    twitter_parentid = db.Column(db.Integer)
    twitter_type = db.Column(db.String(50))
    country_name = db.Column(db.String(250))
    country_name_only = db.Column(db.String(250))
    country_woeid = db.Column(db.Integer)
    county_name = db.Column(db.String(250))
    county_name_only = db.Column(db.String(250))
    county_woeid = db.Column(db.Integer)
    latitude = db.Column(db.Float)
    longitude = db.Column(db.Float)
    name_full = db.Column(db.String(250))
    name_only = db.Column(db.String(250))
    name_woe = db.Column(db.String(250))
    place_type = db.Column(db.String(250))
    state_name = db.Column(db.String(250))
    state_name_only = db.Column(db.String(250))
    state_woeid = db.Column(db.Integer)
    timezone = db.Column(db.String(250))
    
    my_trends = db.relationship('Trend', backref=db.backref('my_location', lazy=True))
    
    def __repr__(self):
        #return '<Location %r>' % (self.name_full)
        return f"<Location {self.name_full} [updated_at: {self.updated_at}>"

# Database schema for Twitter 'trends' table
class Trend(db.Model):
    __tablename__ = 'trends'
    
    # Defining the columns for the table 'trends',
    # which will hold all of the top trends associated with
    # locations in the 'locations' table
    id = db.Column(db.Integer, primary_key=True)
    updated_at = db.Column( db.DateTime )
    woeid = db.Column(db.Integer, db.ForeignKey('locations.woeid') )
    twitter_as_of = db.Column(db.String(100))
    twitter_created_at = db.Column(db.String(100))
    twitter_name = db.Column(db.String(250))
    twitter_tweet_name = db.Column(db.String(250))
    twitter_tweet_promoted_content = db.Column(db.String(250))
    twitter_tweet_query = db.Column(db.String(250))
    twitter_tweet_url = db.Column(db.String(250))
    twitter_tweet_volume = db.Column(db.Float)

    # locations = db.relationship('Location', backref=db.backref('trends', lazy=True))
     
    def __repr__(self):
        #return '<Trend %r>' % (self.twitter_tweet_name)
        return f"<Trend {self.my_location.name_full}: {self.twitter_tweet_name} [updated_at: {self.updated_at}>"

#DIRECTLY ADD CODE HERE WITH JUPYTER NOTEBOOK: *****************************************
# Initial the database on Heroku start-up
# from python.app import db
db.create_all()
db.session.commit()

Note: Local PostgreSQL database login/password is populated


In [4]:
datetime.utcnow()

datetime.datetime(2019, 4, 29, 22, 54, 37, 389064)

In [53]:
abc = api_calls_remaining("/trends/place")

In [57]:
pprint(abc['trends'])

{'/trends/available': {'limit': 75, 'remaining': 75, 'reset': 1556586242},
 '/trends/closest': {'limit': 75, 'remaining': 75, 'reset': 1556586242},
 '/trends/place': {'limit': 75, 'remaining': 75, 'reset': 1556586242}}


In [65]:
api_rate_limits()

{'lists': {'/lists/list': {'limit': 15, 'remaining': 15, 'reset': 1556586468},
  '/lists/memberships': {'limit': 75, 'remaining': 75, 'reset': 1556586468},
  '/lists/subscribers/show': {'limit': 15,
   'remaining': 15,
   'reset': 1556586468},
  '/lists/members': {'limit': 900, 'remaining': 900, 'reset': 1556586468},
  '/lists/subscriptions': {'limit': 15, 'remaining': 15, 'reset': 1556586468},
  '/lists/show': {'limit': 75, 'remaining': 75, 'reset': 1556586468},
  '/lists/ownerships': {'limit': 15, 'remaining': 15, 'reset': 1556586468},
  '/lists/subscribers': {'limit': 180, 'remaining': 180, 'reset': 1556586468},
  '/lists/members/show': {'limit': 15, 'remaining': 15, 'reset': 1556586468},
  '/lists/statuses': {'limit': 900, 'remaining': 900, 'reset': 1556586468}},
 'application': {'/application/rate_limit_status': {'limit': 180,
   'remaining': 170,
   'reset': 1556586188}},
 'mutes': {'/mutes/users/list': {'limit': 15,
   'remaining': 15,
   'reset': 1556586468},
  '/mutes/users/id

In [59]:
# Import database management functions needed to update the
# 'twitter_trends.sqlite' database tables 'locations' and 'trends'

#DIRECTLY ADD CODE HERE WITH JUPYTER NOTEBOOK: *****************************************
# from .db_management import (
#     api_calls_remaining, api_time_before_reset,
#     update_db_locations_table, update_db_trends_table
#     )

# This file contains function which update the
# 'tritter_trends.sqlite' database tables
# 'locations' and 'trends' via API calls to Twitter and Flickr

# The following dependencies are only required for update/mgmt of
# 'locations' and 'trends' data, not for reading the data
import json
import time
import os
import pandas as pd
from datetime import datetime
from dateutil import tz
import requests
from pprint import pprint

# Import a pointer to the Flask-SQLAlchemy database session
# created in the main app.py file
# from app import db, Location, Trend

#DIRECTLY ADD CODE HERE WITH JUPYTER NOTEBOOK: *****************************************
# from .app import db, app
# from .models import Location, Trend

# Only perform import if this is being run locally.
# If being run from Heroku the keys will be provided
# via the app environment variables configured there

try:
    # This will run if the keys are all set via Heroku environment

    # Twitter API
    key_twitter_tweetquestor_consumer_api_key = os.environ['key_twitter_tweetquestor_consumer_api_key']
    key_twitter_tweetquestor_consumer_api_secret_key = os.environ['key_twitter_tweetquestor_consumer_api_secret_key']
    key_twitter_tweetquestor_access_token = os.environ['key_twitter_tweetquestor_access_token']
    key_twitter_tweetquestor_access_secret_token = os.environ['key_twitter_tweetquestor_access_secret_token']

    # Flickr API
    key_flicker_infoquestor_key = os.environ['key_flicker_infoquestor_key']
    key_flicker_infoquestor_secret = os.environ['key_flicker_infoquestor_secret']

except KeyError:
    # Keys have not been set in the environment
    # So need to import them locally
    try:
        # Twitter API keys
        # Flickr API keys
        from api_config import *

    # If the api_config file is not available, then all we can do is flag an error
    except ImportError:
        print("Import Keys: At least one of the API Keys has not been populated on Heroku, and api_config not available!")

# Setup Tweepy API Authentication to access Twitter
import tweepy

try:
    auth = tweepy.OAuthHandler(key_twitter_tweetquestor_consumer_api_key, key_twitter_tweetquestor_consumer_api_secret_key)
    auth.set_access_token(key_twitter_tweetquestor_access_token, key_twitter_tweetquestor_access_secret_token)
    api = tweepy.API(auth, parser=tweepy.parsers.JSONParser())

except TweepError:
    print("Authentication error: Problem authenticating Twitter API using Tweepy (TweepError)")
    
# # Function Definitions: Twitter API Rate Limit Management

def api_rate_limits():
# Return the number of Twitter API calls remaining
# for the specified API type:
# "trends/place": Top 10 trending topics for a WOEID
# "trends/closest": Locations near a specificed lat/long for which Twitter has trending topic info
# "trends/available": Locations for which Twitter has topic info
# "search/tweets": 
# "users/search"
# "users/shows"
# "users/lookup"
# 
# Global Variable: 'api': Tweepy API
# 

    # Get Twitter rate limit information using the Tweepy API
    try:
        rate_limits = api.rate_limit_status()
        
    except:
        print("Tweepy API: Problem getting Twitter rate limits information using tweepy")

    # Return the remaining requests available for the
    # requested type of trends query (or "" if not a valid type)
    try:
        return rate_limits['resources']

    except:
        return ""


def api_calls_remaining( a_type = "place"):
# Return the number of Twitter API calls remaining
# for the specified API type:
# 'place': Top 10 trending topics for a WOEID
# 'closest': Locations near a specificed lat/long for which Twitter has trending topic info
# 'available': Locations for which Twitter has topic info
# 
# Global Variable: 'api': Tweepy API
# 

    # Get Twitter rate limit information using the Tweepy API
    rate_limits = api.rate_limit_status()
    
    # Focus on the rate limits for trends calls
    trends_limits = rate_limits['resources']['trends']
    
    # Return the remaining requests available for the
    # requested type of trends query (or "" if not a valid type)
    try:
        remaining = trends_limits[ f"/trends/{a_type}" ]['remaining']
        print(f"Twitter API 'trends/{a_type}' - API Calls Remaining: {remaining}")

    except:
        return ""

    return remaining


def api_time_before_reset( a_type = "place"):
# Return the number of minutes until the Twitter API is reset
# for the specified API type:
# 'place': Top 10 trending topics for a WOEID
# 'closest': Locations near a specificed lat/long for which Twitter has trending topic info
# 'available': Locations for which Twitter has topic info
# 
# Global Variable: 'api': Tweepy API
# 

    # Get Twitter rate limit information using the Tweepy API
    rate_limits = api.rate_limit_status()
    
    # Focus on the rate limits for trends calls
    trends_limits = rate_limits['resources']['trends']
    
    
    # Return the reset time for the
    # requested type of trends query (or "" if not a valid type)
    try:
        reset_ts = trends_limits[ f"/trends/{a_type}" ]['reset']
    except:
        return -1
        
    # Calculate the remaining time using datetime methods to
    # get the UTC time from the POSIX timestamp
    reset_utc = datetime.utcfromtimestamp(reset_ts)
    
    # Current the current time
    current_utc = datetime.utcnow()
    
    # Calculate the number of seconds remaining,
    # Assumption: reset time will be >= current time
    time_before_reset = (reset_utc - current_utc).total_seconds() / 60.0
    
    # Tell the datetime object that it's in UTC time zone since 
    # datetime objects are 'naive' by default
    reset_utc = reset_utc.replace(tzinfo = tz.tzutc() )
    
    # Convert time zone
    reset_local = reset_utc.astimezone( tz.tzlocal() )

    # Tell the datetime object that it's in UTC time zone since 
    # datetime objects are 'naive' by default
    current_utc = current_utc.replace(tzinfo = tz.tzutc() )
    
    # Convert time zone
    current_local = current_utc.astimezone( tz.tzlocal() )
    print(f"Twitter API 'trends/{a_type}' - Time Before Rate Limit Reset: {time_before_reset:.1f}: Reset Time: {reset_local.strftime('%Y-%m-%d %H:%M:%S')}, Local Time: {current_local.strftime('%Y-%m-%d %H:%M:%S')}")
    
    # Return the time before reset (in minutes)
    return time_before_reset


# # Function Definitions: Twitter Locations with Available Trends Info

def get_loc_with_trends_available_to_df( ):
# Get locations that have trends data from a api.trends_available() call,
# flatten the data, and create a dataframe

    # Obtain the WOEID locations for which Twitter Trends info is available
    try:
        trends_avail = api.trends_available()
        
    except:
        # No locations info available, return False
        print(f"Tweepy API: Problem getting locations that have trends available information")
        return False
    
    # Import trend availability info into a dataframe
    trends_avail_df = pd.DataFrame.from_dict(trends_avail, orient='columns')
    
    # Set the 'updated_at' column to the current time in UTC timezone for all locations
    trends_avail_df['updated_at'] = datetime.utcnow()

    # Retain only locations in the U.S.
    trends_avail_df = trends_avail_df[ (trends_avail_df['countryCode'] == "US") ]
        
    # Reset the index
    trends_avail_df.reset_index(drop=True, inplace=True)

    # Flatten the dataframe by unpacking the placeType column information into separate columns
    trends_avail_df['twitter_type'] = trends_avail_df['placeType'].map( lambda x: x['name'])

    # Remove unneeded fields
    trends_avail_df.drop(['placeType', 'url' ], axis='columns' , inplace = True)

    # Rename the fields
    trends_avail_df.rename(columns={
        'woeid': 'woeid',
        'country': 'twitter_country',
        'countryCode': 'tritter_country_code',
        'name': 'twitter_name',
        'parentid': 'twitter_parentid' }, inplace=True)
    
    return trends_avail_df



def get_location_info( a_woeid ):
# Use Flickr API call to get location information associated with a Yahoo! WOEID
# Note: Yahoo! no longer supports this type of lookup! :(

    # Setup the Flickr API base URL
    flickr_api_base_url = f"https://api.flickr.com/services/rest/?method=flickr.places.getInfo&api_key={key_flicker_infoquestor_key}&format=json&nojsoncallback=1&woe_id="

    # Populate the WOEID and convert to string format
    woeid_to_search = str(a_woeid)
    
    # Build the full URL for API REST request
    flickr_api_url = flickr_api_base_url + woeid_to_search

    try:
        # Get the REST response, which will be in JSON format
        response = requests.get(url=flickr_api_url)
        
    except requests.exceptions.RequestException as e:
        print(f"Flickr API: Problem getting location information for WOEID {a_woeid}: ")
        return False
    
    # Parse the json
    location_data = response.json()
    
    # Check for failure to locate the information
    if (location_data['stat'] == 'fail'):
        print(f"Flickr API: Problem finding location WOEID {a_woeid}: {location_data['message']}")
        
        
    #pprint(location_data)
    
    # Return just a useful subset of the location info as flattened dictionary
    key_location_info = {}
    
    # Basic information that should be present for any location
    try:
        key_location_info.update( {
            'woeid': int(location_data['place']['woeid']),
            'name_woe': location_data['place']['woe_name'],
            'name_full': location_data['place']['name'],
            'name_only': location_data['place']['name'].split(",")[0].strip(),
            'place_type': location_data['place']['place_type'],
            'latitude': float(location_data['place']['latitude']),
            'longitude': float(location_data['place']['longitude']),
        })
                
    except:
        print("Error - basic location information not returned for WOEID{a_woeid}: ", sys.exc_info()[0])
    
    # Timezone associated with the location - if available
    try:
        key_location_info.update( {
            'timezone': location_data['place']['timezone']  
        })
        
    except:
        key_location_info.update( {
            'timezone': None
        })
        
    # County associated with the location - if available
    try:
        key_location_info.update( {
            'county_name': location_data['place']['county']['_content'],
            'county_name_only': location_data['place']['county']['_content'].split(",")[0].strip(),
            'county_woeid': int(location_data['place']['county']['woeid']),
        })
    except:
        key_location_info.update( {
            'county_name': None,
            'county_name_only': None,
            'county_woeid': None,
        })
        
    # State associated with the location - if available
    try:
        key_location_info.update( {
            'state_name': location_data['place']['region']['_content'],
            'state_name_only': location_data['place']['region']['_content'].split(",")[0].strip(),
            'state_woeid': int(location_data['place']['region']['woeid']),
        })
    except:
        key_location_info.update( {
            'state_name': None,
            'state_name_only': None,
            'state_woeid': None,
        })
        
    # Country associated with the location - if available
    try:
        key_location_info.update( {
            'country_name': location_data['place']['country']['_content'],
            'country_name_only': location_data['place']['country']['_content'].split(",")[0].strip(),
            'country_woeid': int(location_data['place']['country']['woeid']),
        })
    except:
        key_location_info.update( {
            'country_name': None,
            'country_name_only': None,
            'country_woeid': None, 
        })
    
    return key_location_info


def update_db_locations_table():
# Function to update the list of Twitter locations in the'locations' DB table.
# This function uses a Twitter API to get the list of locations for which top trends
# information is available.  It then uses a Flickr API to obtain location details for
# each of these Twitter specified locations.  A merge is then performed of the two
# DataFrames, resulting in a single dataframe that is used to update the 'locations' table.
# NOTE: The Twitter 'trends/available' API call is not rate limited.
#
# This function assumes that the 'locations' table in the database has already been configured
# and is ready for data.

    # Flatten the Twitter Trends results and populate in a Dataframe
    loc_with_trends_available_df = get_loc_with_trends_available_to_df( )

    # Use the get_location_info() function to add location info (from Flickr)
    # for each location (Twitter WOEID) that has trend info
    loc_info_list =  list( loc_with_trends_available_df['woeid'].apply( get_location_info ) )

    # Create a DataFrame from the location info list
    loc_info_df = pd.DataFrame.from_dict(loc_info_list)

    # Merge the Twitter trend location available dataframe with the
    # location info dataframe to create a master list of all
    # Twitter Trend locations and associated location information
    twitter_trend_locations_df = loc_with_trends_available_df.merge(loc_info_df, how='inner', on='woeid')

    # Delete all location information currently in the database 'locations' table

    # CHANGED FOR GeoTweet+: Keep all entries - don't delete them!
    # db.session.query(Location).delete()
    # db.session.commit()

    # Write this table of location data to the database 'locations' table
    # twitter_trend_locations_df.to_sql( 'locations', con=db.engine, if_exists='append', index=False)
    # db.session.commit()

    # CHANGED FOR GeoTweet+: Update locations already in the table and add locations that are not
    # There is no cross-database SQLAlchemy support for the 'upsert' operation,
    # So query for each WOEID in the dataframe and decide if an 'add' or an 'update' is needed...
    
    # Convert all 'NaN' values to 'None' to avoid issues when updating the database
    # Note: Some cities had county_woeid set to "NaN", which caused much havoc with db operations
    twitter_trend_locations_df = twitter_trend_locations_df.where((pd.notnull(twitter_trend_locations_df)), None)

# DEBUG *****************************************************************************************************

    global test_df
    test_df = twitter_trend_locations_df
    
    # Loop through all rows in the update dataframe
    n_adds = 0
    n_updates = 0
    for index, row in twitter_trend_locations_df.iterrows():
        # Get this row into a dictionary, but exclude primary key 'woeid'
        row_dict = row.to_dict()

        # pprint(f"DataFrame: {row['woeid']}")
        result = db.session.query(Location).filter( Location.woeid == row['woeid'] ).first()

        if result is None:
            # This location is not in the table, so add this entrry to the 'locations' table.
            # NOTE: 
            # Location is the Class mapped to the 'locations' table
            # row_dict is a dictionary containing all of the column values for this row as key/value pairs
            # The term "**row_dict" creates a "key=value" parameter for each key/value pair
#             print(f"ADD: DataFrame twitter_trend_locations_df: {row['woeid']} => Database 'locations': New Entry")
            try:
                db.session.add( Location(**row_dict) )
                db.session.commit()
                n_adds += 1
                
            except:
                print(f">>> Error while attempting to add record to 'locations'")
                db.session.rollback()
            
        else:
            # This location is in the table, so update this entry in the 'locations' table.
#             print(f"UPDATE: DataFrame twitter_trend_locations_df: {row['woeid']} => Database 'locations': {result.woeid}: {result.name_full}")
            
            try:
                db.session.query(Location).filter( Location.woeid == row['woeid'] ).update( row_dict )
                db.session.commit()
                n_updates += 1
                
            except:
                print(f">>> Error while attempting to update record in 'locations'")
                db.session.rollback()
                
    # Return the total number of entries in the Locations table
    num_loc = db.session.query(Location).count()
    
    print(f"Adds/Updates complete: Adds: {n_adds}, Updates {n_updates} => Rows in 'locations' table: {num_loc}")
    
    return num_loc



# # Function Definitions: Twitter Top Trends for Twitter Locations

def get_trends_for_loc( a_woeid ):
# Get top Twitter trending tweets for a location specified by a WOEID,
# flatten the data, and return it as a list of dictionaries

    # Import trend availability info into a dataframe
    try:
        top_trends = api.trends_place( a_woeid )[0]
        
    except:
        # No top trends info available for this WOEID, return False
        print(f"Tweepy API: Problem getting trends information for WOEID {a_woeid}")
        return False
    
    #pprint(top_trends)
    
    # Repeat some information that is common for all elements in the trends list
    common_info = {}
        
    # Basic information that should be present for any location
    # 'updated_at': Current time in UTC timezone
    # 'as_of': '2019-03-26T21:22:42Z',
    # 'created_at': '2019-03-26T21:17:18Z',
    # 'locations': [{'name': 'Atlanta', 'woeid': 2357024}]
    try:
        common_info.update( {
            'woeid': int(top_trends['locations'][0]['woeid']),
            'updated_at': datetime.utcnow(),
            'twitter_name': top_trends['locations'][0]['name'],
            'twitter_created_at': top_trends['created_at'],
            'twitter_as_of': top_trends['as_of']
        })
                
    except:
        print("Error - basic location information not returned for WOEID{a_woeid}: ", sys.exc_info()[0])
   
    # Loop through all of the trends and store in an array of dictionary elements
    # 'name': 'Jussie Smollett'
    # 'promoted_content': None
    # 'query': '%22Jussie+Smollett%22'
    # 'tweet_volume': 581331
    # 'url': 'http://twitter.com/search?q=%22Jussie+Smollett%22'

    # Return the trends as an array of flattened dictionaries
    trend_info = []

    for ti in top_trends['trends']:
        
        # Put the trend info into a dictionary, starting with the common info
        this_trend = common_info.copy()
        
        # Timezone associated with the location - if available
        try:
            this_trend.update( {
                'twitter_tweet_name': ti['name'],
                'twitter_tweet_promoted_content': ti['promoted_content'],
                'twitter_tweet_query': ti['query'],
                'twitter_tweet_volume': ti['tweet_volume'],
                'twitter_tweet_url': ti['url']
            })

        except:
            this_trend.update( {
                'twitter_tweet_name': None,
                'twitter_tweet_promoted_content': None,
                'twitter_tweet_query': None,
                'twitter_tweet_volume': None,
                'twitter_tweet_url': None
            })
            
        # Append this trend to the list
        trend_info.append( this_trend )
    
    return trend_info



def update_db_trends_table():
# Function to obtain the list of Twitter locations from the 'locations' DB table.
# The function then loops through each location,
# obtains the Twitter top trends info, and then appends that data to the 'trends' table.
# The function uses rate limit check functions to see if the Twitter API call rate limit
# is about to be reached, and if so, delays the next relevant API call until the rate limit
# is scheduled to be reset (a period of up to 15minutes) before continuing.
#
# This function assumes that the 'trends' table in the database has already been configured
# and is ready for data.

    # Obtain the list of Twitter locations from the 'locations' DB table
    loc_list = [ x[0] for x in db.session.query(Location.woeid).all()]
    print(f"Retrieved {len(loc_list)} locations for processing")
    
    # Keep track of the actual number of locations
    # where trend info was written to the 'trends' table
    num_location_trends_written_to_db = 0
    
    for tw_woeid in loc_list:
        print(f">> Updating trends for location {tw_woeid}")

        # Make sure we haven't hit the rate limit yet
        calls_remaining = api_calls_remaining( "place" )
        time_before_reset = api_time_before_reset( "place" )

        # If we're close to hitting the rate limit for the trends/place API,
        # then wait until the next reset =
        # 'time_before_reset' minutes + 1 minute buffer
        if (calls_remaining < 2):
            print (f">> Waiting {time_before_reset} minutes due to rate limit")
            time.sleep( (time_before_reset+1) * 60)

        # Get trend info for a WOEID location
        t_info = get_trends_for_loc(tw_woeid)

        try:
            # Create a DataFrame
            t_info_df = pd.DataFrame.from_dict(t_info)
            
            # Delete any trends associated with this WOEID
            # before appending new trends to the 'trends' table for this WOEID
            
            # CHANGED FOR GeoTweet+: Keep all entries - don't delete them!
            # db.session.query(Trend).filter(Trend.woeid == tw_woeid).delete()
            # db.session.commit()

            # Append trends for this WOEID to the 'trends' database table
            t_info_df.to_sql( 'trends', con=db.engine, if_exisrts='append', index=False)
            db.session.commit()

            # Increment the count
            num_location_trends_written_to_db += 1

        except:
            print(f">> Error occurred with location {tw_woeid} while attempting to prepare and write trends data")
            
    return num_location_trends_written_to_db


def parse_date_range(a_date_range = None):
# Function to parse date ranges specified with the Flask API '/period' routes
# Note, 
# Arguments: Single string a_date_range with possible formats:
#     a_date_range = "2019-03-01"
#     a_date_range = ":2019-06-01"
#     a_date_range = "2019-03-01:2019-06-30"
#     a_date_range = ":"
#     a_date_range = "all"
#     a_date_range = ""
#
# Returns:
#     start_date: Earliest date (inclusive), for use in date comparison
#     end_date: Latest date (inclusive), for use in date comparison
#     If either date cannot be parsed, an error message is returned

    # Max and Min dates
    DATE_EARLIEST_POSSIBLE = parser.parse("2000-01-01").date()
    DATE_LATEST_POSSIBLE = parser.parse("2100-12-31").date()

    # Initialize default return valus - no date restriction
    start_date = DATE_EARLIEST_POSSIBLE
    end_date = DATE_LATEST_POSSIBLE
    
    # Parse the argument to obtain the start and end dates - if provided
    
    # If no argument provided, provide full date range (i.e., no date restriction)
    if a_date_range is None:
        # Return default values
        return (start_date, end_date)

    # Prep the date range for additional processing
    date_range = a_date_range.strip().lower()
    
    # Check for "all" and similar indications of no date restriction
    if date_range == "all" or date_range == "" or date_range == ":" :
        # Return default values
        return (start_date, end_date)
    
    # Attempt to split the date range (seperator = ":")
    arg_list = a_date_range.split(":")
    
    # If only one argument provided (i.e., no ":")
    # then restrict date range to just that one date
    if len(arg_list) == 1:
        try:
            start_date = parser.parse(arg_list[0]).date()
            end_date = start_date
            
        except ValueError:
            start_date = f"ERROR"
            end_date = start_date

        return (start_date, end_date)
    
    # At least 2 args provided, so assume they are start and end dates
    
    # Populate start date if the argument is populated, otherwise leave the default
    if len(arg_list[0])>0:
        try:
            start_date = parser.parse(arg_list[0]).date()
        except ValueError:
            start_date = f"ERROR"

    # Populate end date if the argument is populated, otherwise leave the default
    if len(arg_list[1])>0:
        try:
            end_date = parser.parse(arg_list[1]).date()
        except ValueError:
            end_date =  f"ERROR"

    # Get the date range from the arguments
    return (start_date, end_date)

# Flask app route actions - modified for local execution

In [6]:
#********************************************************************************
# Default route - display the main page
# NOTE: Flask expects rendered templates to be in the ./templates folder
# @app.route("/")
# def home():
#     return render_template("index.html")

#********************************************************************************
# Return information relevant to update
# of the 'locations' and 'trends' database tables
# @app.route("/update")
def update_info():
    # Obtain remaining number of API calls for trends/place
    api_calls_remaining_place = api_calls_remaining( "place")

    # Obtain time before rate limits are reset for trends/available
    api_time_before_reset_place = api_time_before_reset( "place")

    # Obtain remaining number of API calls for trends/place
    api_calls_remaining_available = api_calls_remaining( "available")

    # Obtain time before rate limits are reset for trends/available
    api_time_before_reset_available = api_time_before_reset( "available")

    # Count the number of locations in the 'locations' table
    n_locations = db.session.query(Location).count()

    # Count the number of total trends in the 'trends' table
    n_trends = db.session.query(Trend).count()

    # Provide the average number of Twitter Trends provided per location
    # Use try/except to catch divide by zero
    try:
        n_trends_per_location_avg = n_trends / n_locations
    except ZeroDivisionError:
        n_trends_per_location_avg = None

    api_info = {
        'api_calls_remaining_place': api_calls_remaining_place,
        'api_time_before_reset_place': api_time_before_reset_place,
        'api_calls_remaining_available': api_calls_remaining_available,
        'api_time_before_reset_available': api_time_before_reset_available,
        'n_locations': n_locations,
        'n_trends': n_trends,
        'n_trends_per_location_avg' : n_trends_per_location_avg
    }

#     return jsonify(api_info)
    return api_info

#********************************************************************************
# Update the 'locations' table via API calls
# Note: Typically requires less than 1 minute
# @app.route("/update/locations")
def update_locations_table():
    # Update the locations table through API calls
    n_locations = update_db_locations_table()

    api_info = {
        'n_locations': n_locations
    }

#     return jsonify(api_info)
    return api_info

#********************************************************************************
# Update the 'trends' table via API calls
# Note: Typically requires less than 1 minute if no rate limits
#       But require up to 15 minutes if rate limits are in effect
# @app.route("/update/trends")
def update_trends_table():
    # Update the trends table through API calls
    n_location_trends = update_db_trends_table()

    api_info = {
        'n_location_trends': n_location_trends
    }

#     return jsonify(api_info)
    return api_info


#********************************************************************************
# Return a list of all locations with Twitter Top Trend info
# @app.route("/locations")
def get_all_locations():
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time
    # results = db.session.query(Location).all()
        
    # Create a subquery to find the most recent "updated_at" record per woeid
    loc_subq = db.session.query(Location.woeid, func.max(Location.updated_at) \
                        .label("max_updated_at")) \
                        .group_by(Location.woeid) \
                        .subquery()

    results = db.session.query(Location) \
                    .filter( and_( \
                            Location.woeid == loc_subq.c.woeid, \
                            Location.updated_at == loc_subq.c.max_updated_at \
                           )) \
                    .order_by(Location.woeid).all()

    loc_list = []
    for r in results:
        loc_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'latitude': r.latitude,
            'longitude': r.longitude,
            'name_full': r.name_full,
            'name_only': r.name_only,
            'name_woe': r.name_woe,
            'county_name': r.county_name,
            'county_name_only': r.county_name_only,
            'county_woeid': r.county_woeid,
            'state_name': r.state_name,
            'state_name_only': r.state_name_only,
            'state_woeid': r.state_woeid,
            'country_name': r.country_name,
            'country_name_only': r.country_name_only,
            'country_woeid': r.country_woeid,
            'place_type': r.place_type,
            'timezone': r.timezone,
            'twitter_type': r.twitter_type,
            'twitter_country': r.twitter_country,
            'tritter_country_code': r.tritter_country_code,
            'twitter_name': r.twitter_name,
            'twitter_parentid': r.twitter_parentid
        }

        # loc_info = {
        #     'woeid': r.Location.woeid,
        #     'latitude': r.Location.latitude,
        #     'longitude': r.Location.longitude,
        #     'name_full': r.Location.name_full,
        #     'name_only': r.Location.name_only,
        #     'name_woe': r.Location.name_woe,
        #     'county_name': r.Location.county_name,
        #     'county_name_only': r.Location.county_name_only,
        #     'county_woeid': r.Location.county_woeid,
        #     'state_name': r.Location.state_name,
        #     'state_name_only': r.Location.state_name_only,
        #     'state_woeid': r.Location.state_woeid,
        #     'country_name': r.Location.country_name,
        #     'country_name_only': r.Location.country_name_only,
        #     'country_woeid': r.Location.country_woeid,
        #     'place_type': r.Location.place_type,
        #     'timezone': r.Location.timezone,
        #     'twitter_type': r.Location.twitter_type,
        #     'twitter_country': r.Location.twitter_country,
        #     'tritter_country_code': r.Location.tritter_country_code,
        #     'twitter_parentid': r.Location.twitter_parentid,

        #     'twitter_as_of': r.Trend.twitter_as_of,
        #     'twitter_created_at': r.Trend.twitter_created_at,
        #     'twitter_name': r.Trend.twitter_name,
        #     'twitter_tweet_name': r.Trend.twitter_tweet_name,
        #     'twitter_tweet_promoted_content': r.Trend.twitter_tweet_promoted_content,
        #     'twitter_tweet_query': r.Trend.twitter_tweet_query,
        #     'twitter_tweet_url': r.Trend.twitter_tweet_url,
        #     'twitter_tweet_volume': r.Trend.twitter_tweet_volume
        # }

        loc_list.append(loc_info)

#     return jsonify(loc_list)
    return (loc_list)

#********************************************************************************
# Return a list of one location  with Twitter Top Trend info with teh specified WOEID
# @app.route("/locations/<a_woeid>")
def get_info_for_location(a_woeid):
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time
    # results = db.session.query(Location) \
    #                     .filter(Location.woeid == a_woeid) \
    #                     .all()
        
    # Create a subquery to find the most recent "updated_at" record per woeid
    loc_subq = db.session.query(Location.woeid, func.max(Location.updated_at) \
                        .label("max_updated_at")) \
                        .group_by(Location.woeid) \
                        .subquery()

    results = db.session.query(Location) \
                    .filter( and_( \
                            Location.woeid == a_woeid, \
                            Location.woeid == loc_subq.c.woeid, \
                            Location.updated_at == loc_subq.c.max_updated_at \
                            )) \
                    .order_by(Location.woeid).all()
    
    loc_list = []
    for r in results:
        loc_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'latitude': r.latitude,
            'longitude': r.longitude,
            'name_full': r.name_full,
            'name_only': r.name_only,
            'name_woe': r.name_woe,
            'county_name': r.county_name,
            'county_name_only': r.county_name_only,
            'county_woeid': r.county_woeid,
            'state_name': r.state_name,
            'state_name_only': r.state_name_only,
            'state_woeid': r.state_woeid,
            'country_name': r.country_name,
            'country_name_only': r.country_name_only,
            'country_woeid': r.country_woeid,
            'place_type': r.place_type,
            'timezone': r.timezone,
            'twitter_type': r.twitter_type,
            'twitter_country': r.twitter_country,
            'tritter_country_code': r.tritter_country_code,
            'twitter_name': r.twitter_name,
            'twitter_parentid': r.twitter_parentid
        }

        loc_list.append(loc_info)

#     return jsonify(loc_list)
    return (loc_list)


#********************************************************************************
# Return a list of all locations that have the specified tweet in its top trends
# and then sort the results by tweet volume in descending order (with NULLs last)
# @app.route("/locations/tweet/<a_tweet>")
def get_locations_with_tweet(a_tweet):
    # Query to obtain all locations in the 'locations' table
    # REVISED FOR GeoTweet+: Needs to account for retention of locations over time
    results = db.session.query(Trend, Location).join(Location) \
                        .filter(Trend.twitter_tweet_name == a_tweet ) \
                        .order_by( Trend.twitter_tweet_volume.desc().nullslast() ).all()

    loc_list = []
    for r in results:
        #print(f"Trend Information for {r.Trend.woeid} {r.Location.name_full}: {r.Trend.twitter_tweet_name} {r.Trend.twitter_tweet_volume}")
        loc_info = {
            'updated_at': r.Location.updated_at,
            'woeid': r.Location.woeid,
            'latitude': r.Location.latitude,
            'longitude': r.Location.longitude,
            'name_full': r.Location.name_full,
            'name_only': r.Location.name_only,
            'name_woe': r.Location.name_woe,
            'county_name': r.Location.county_name,
            'county_name_only': r.Location.county_name_only,
            'county_woeid': r.Location.county_woeid,
            'state_name': r.Location.state_name,
            'state_name_only': r.Location.state_name_only,
            'state_woeid': r.Location.state_woeid,
            'country_name': r.Location.country_name,
            'country_name_only': r.Location.country_name_only,
            'country_woeid': r.Location.country_woeid,
            'place_type': r.Location.place_type,
            'timezone': r.Location.timezone,
            'twitter_type': r.Location.twitter_type,
            'twitter_country': r.Location.twitter_country,
            'tritter_country_code': r.Location.tritter_country_code,
            'twitter_parentid': r.Location.twitter_parentid,

            'twitter_as_of': r.Trend.twitter_as_of,
            'twitter_created_at': r.Trend.twitter_created_at,
            'twitter_name': r.Trend.twitter_name,
            'twitter_tweet_name': r.Trend.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.Trend.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.Trend.twitter_tweet_query,
            'twitter_tweet_url': r.Trend.twitter_tweet_url,
            'twitter_tweet_volume': r.Trend.twitter_tweet_volume
        }

        loc_list.append(loc_info)

#     return jsonify(loc_list)
    return (loc_list)


#********************************************************************************
# Return the full list of all trends with Twitter Top Trend info
# @app.route("/trends")
def get_all_trends():
    # Query to obtain all trends in the 'trends' table
    # REVISED FOR GeoTweet+: Needs to account for retention of trends over time
    # results = db.session.query(Trend).all()

    # Create a subquery to find the most recent "updated_at" record per woeid
    trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at) \
                        .label("max_updated_at")) \
                        .group_by(Trend.woeid) \
                        .subquery()

    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                    .filter( and_(
                            Trend.woeid == trend_subq.c.woeid, \
                            Trend.updated_at == trend_subq.c.max_updated_at \
                           )) \
                    .all()

    
    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

#     return jsonify(trend_list)
    return (trend_list)

#********************************************************************************
# Return the full list of Twitter Top Trends for a specific location
# and then sort the results by tweet volume in descending order (with NULLs last)
# @app.route("/trends/<a_woeid>")
def get_trends_for_location(a_woeid):
    # Query to obtain all trends in the 'trends' table
    # REVISED FOR GeoTweet+: Needs to account for retention of trends over time
    # results = db.session.query(Trend).filter(Trend.woeid == a_woeid) \
    #                    .order_by(Trend.twitter_tweet_volume.desc().nullslast() ) \
    #                    .all()

    # Create a subquery to find the most recent "updated_at" record per woeid
    trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at) \
                        .label("max_updated_at")) \
                        .group_by(Trend.woeid) \
                        .subquery()

    # Query to pull all of the most recent Trends (50 per entry in 'locations' table)
    results = db.session.query(Trend) \
                    .filter( and_( \
                            Trend.woeid == a_woeid, \
                            Trend.woeid == trend_subq.c.woeid, \
                            Trend.updated_at == trend_subq.c.max_updated_at \
                           )) \
                    .order_by(Trend.twitter_tweet_volume.desc().nullslast() ) \
                    .all()
    
    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

#     return jsonify(trend_list)
    return (trend_list)

#********************************************************************************
# Return the top 5 list of Twitter Top Trends for a specific location
# and then sort the results by tweet volume in descending order (with NULLs last)
# @app.route("/trends/top/<a_woeid>")
def get_top_trends_for_location(a_woeid):
    # Query to obtain all trends in the 'trends' table
    # REVISED FOR GeoTweet+: Needs to account for retention of trends over time
    results = db.session.query(Trend) \
                        .filter(Trend.woeid == a_woeid) \
                        .order_by(Trend.twitter_tweet_volume.desc().nullslast() ) \
                        .limit(10).all()

    trend_list = []
    for r in results:
        trend_info = {
            'updated_at': r.updated_at,
            'woeid': r.woeid,
            'twitter_as_of': r.twitter_as_of,
            'twitter_created_at': r.twitter_created_at,
            'twitter_name': r.twitter_name,
            'twitter_tweet_name': r.twitter_tweet_name,
            'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
            'twitter_tweet_query': r.twitter_tweet_query,
            'twitter_tweet_url': r.twitter_tweet_url,
            'twitter_tweet_volume': r.twitter_tweet_volume
        }

        trend_list.append(trend_info)

#     return jsonify(trend_list)
    return (trend_list)


# if __name__ == "__main__":
#     app.run()

In [7]:
# Perform a query
# Albuquerque: 2352824
# United States: 23424977
q_woeid = 2352824
results = db.session.query(Trend).filter(Trend.woeid == q_woeid) \
                        .order_by(Trend.twitter_tweet_volume.desc().nullslast() ).limit(10).all()

# Print results
trend_list = []
for r in results:
    trend_info = {
        'updated_at': r.updated_at,
        'woeid': r.woeid,
        'twitter_as_of': r.twitter_as_of,
        'twitter_created_at': r.twitter_created_at,
        'twitter_name': r.twitter_name,
        'twitter_tweet_name': r.twitter_tweet_name,
        'twitter_tweet_promoted_content': r.twitter_tweet_promoted_content,
        'twitter_tweet_query': r.twitter_tweet_query,
        'twitter_tweet_url': r.twitter_tweet_url,
        'twitter_tweet_volume': r.twitter_tweet_volume
    }

    trend_list.append(trend_info)

# pprint ( trend_list[0]['updated_at'].strftime("%a %m/%d/%y %H:%M:%S") )
print ( trend_list[0]['updated_at'].strftime("%x %X") )


04/29/19 04:46:21


In [8]:
# Perform a query
# Albuquerque: 2352824
# United States: 23424977
q_woeid = 23424977
retval = get_trends_for_location(q_woeid)
print(len(retval))
pprint(retval)

50
[{'twitter_as_of': '2019-04-29T04:46:29Z',
  'twitter_created_at': '2019-04-29T04:39:51Z',
  'twitter_name': 'United States',
  'twitter_tweet_name': '#GameofThrones',
  'twitter_tweet_promoted_content': None,
  'twitter_tweet_query': '%23GameofThrones',
  'twitter_tweet_url': 'http://twitter.com/search?q=%23GameofThrones',
  'twitter_tweet_volume': 2809280.0,
  'updated_at': datetime.datetime(2019, 4, 29, 4, 46, 28, 155248),
  'woeid': 23424977},
 {'twitter_as_of': '2019-04-29T04:46:29Z',
  'twitter_created_at': '2019-04-29T04:39:51Z',
  'twitter_name': 'United States',
  'twitter_tweet_name': 'Arya',
  'twitter_tweet_promoted_content': None,
  'twitter_tweet_query': 'Arya',
  'twitter_tweet_url': 'http://twitter.com/search?q=Arya',
  'twitter_tweet_volume': 1726986.0,
  'updated_at': datetime.datetime(2019, 4, 29, 4, 46, 28, 155248),
  'woeid': 23424977},
 {'twitter_as_of': '2019-04-29T04:46:29Z',
  'twitter_created_at': '2019-04-29T04:39:51Z',
  'twitter_name': 'United States',
 

  'twitter_tweet_url': 'http://twitter.com/search?q=%22Tyrion+and+Sansa%22',
  'twitter_tweet_volume': None,
  'updated_at': datetime.datetime(2019, 4, 29, 4, 46, 28, 155248),
  'woeid': 23424977},
 {'twitter_as_of': '2019-04-29T04:46:29Z',
  'twitter_created_at': '2019-04-29T04:39:51Z',
  'twitter_name': 'United States',
  'twitter_tweet_name': 'Jon and Dany',
  'twitter_tweet_promoted_content': None,
  'twitter_tweet_query': '%22Jon+and+Dany%22',
  'twitter_tweet_url': 'http://twitter.com/search?q=%22Jon+and+Dany%22',
  'twitter_tweet_volume': None,
  'updated_at': datetime.datetime(2019, 4, 29, 4, 46, 28, 155248),
  'woeid': 23424977},
 {'twitter_as_of': '2019-04-29T04:46:29Z',
  'twitter_created_at': '2019-04-29T04:39:51Z',
  'twitter_name': 'United States',
  'twitter_tweet_name': '#TalktheThrones',
  'twitter_tweet_promoted_content': None,
  'twitter_tweet_query': '%23TalktheThrones',
  'twitter_tweet_url': 'http://twitter.com/search?q=%23TalktheThrones',
  'twitter_tweet_volume'

# Verify Basic DB functions using Local Database

In [9]:
# db.session.rollback()

In [10]:
# db.session.close()

In [11]:
# sample_dict = test_df.iloc[38].to_dict()
# sample_dict

In [12]:
# row_dict=sample_dict

In [13]:
# row_dict['updated_at'] = parser.parse('2019-04-28 00:35:00.000001')
# row_dict

In [14]:
# # print(f"Dictionary to Add or Update:")
# # pprint(row_dict)

# result = db.session.query(Location).filter( Location.woeid == int(row_dict['woeid']) ).first()
# n_adds = 0
# n_updates = 0
# if result is None:
#     # This location is not in the table, so add this entrry to the 'locations' table.
#     # NOTE: 
#     # Location is the Class mapped to the 'locations' table
#     # row_dict is a dictionary containing all of the column values for this row as key/value pairs
#     # The term "**row_dict" creates a "key=value" parameter for each key/value pair
#     n_add += 1
#     print(f"ADD: DataFrame: {row_dict['woeid']} => Database 'locations': New Entry")
#     db.session.add( Location(**row_dict) )
#     db.session.commit()

# else:
#     # This location is in the table, so update this entry in the 'locations' table.
#     n_updates += 1
#     print(f"UPDATE: DataFrame: {row_dict['woeid']} => Database 'locations': {result.woeid}: {result.name_full}")
    
#     # WHY IS THIS UPDATE FAILING WITH 'ProgrammingError' ??
#     db.session.query(Location).filter( Location.woeid == int(row_dict['woeid']) ).update( row_dict )
#     db.session.commit()
        
# # Return the total number of entries in the Locations table
# num_loc = db.session.query(Location).count()
# print(f"Adds/Updates complete: Adds: {n_adds}, Updates {n_updates} => Rows in 'locations' table: {num_loc}")


In [15]:
# FYI, checking for any 'NaN' values in the dataframe
# fixed_test_df[ pd.notnull(fixed_test_df['county_woeid']) == False ]['county_woeid']

In [16]:
# parser.parse('2019-04-28 17:37:37.664148')

In [17]:
# homeloc_dict = {
#     'twitter_country': 'United States',
#     'tritter_country_code': 'US',
#     'twitter_name': 'Carol Stream',
#     'twitter_parentid': 23424977,
#     'woeid': 123456,
#     'updated_at': parser.parse('2019-04-28 17:37:37.664148'),
#     'twitter_type': 'Town',
#     'country_name': 'United States',
#     'country_name_only': 'United States',
#     'country_woeid': 23424977,
#     'county_name': 'Carol Stream, Illinois, United States',
#     'county_name_only': 'DuPage County',
#     'county_woeid': 12589279,
#     'latitude': 35.105,
#     'longitude': -106.647,
#     'name_full': 'Carol Stream, Illinois, United States',
#     'name_only': 'Carol Stream',
#     'name_woe': 'Carol Stream',
#     'place_type': 'locality',
#     'state_name': 'Illinois, United States',
#     'state_name_only': 'Illinois',
#     'state_woeid': 2347590.0,
#     'timezone': 'America/Chicago'
# }
# homeloc_dict

In [18]:
# Update locations table
# n_locations = update_db_locations_table()
# print(n_locations)

In [19]:
# Query to get the max (i.e., most recent) "updated_at" values per location
loc_max_update_at_list = db.session.query(Location.woeid, func.max(Location.updated_at) \
                                        .label("max_updated_at")) \
                                        .group_by(Location.woeid) \
                                        .order_by(Location.woeid) \
                                        .all()
print( len(loc_max_update_at_list) )
for m in loc_max_update_at_list:
    print( f"{m.woeid} => {m.max_updated_at}")
    

64
2352824 => 2019-04-29 01:10:20.129726
2357024 => 2019-04-29 01:10:20.129726
2357536 => 2019-04-29 01:10:20.129726
2358820 => 2019-04-29 01:10:20.129726
2359991 => 2019-04-29 01:10:20.129726
2364559 => 2019-04-29 01:10:20.129726
2367105 => 2019-04-29 01:10:20.129726
2378426 => 2019-04-29 01:10:20.129726
2379574 => 2019-04-29 01:10:20.129726
2380358 => 2019-04-29 01:10:20.129726
2381475 => 2019-04-29 01:10:20.129726
2383489 => 2019-04-29 01:10:20.129726
2383660 => 2019-04-29 01:10:20.129726
2388929 => 2019-04-29 01:10:20.129726
2391279 => 2019-04-29 01:10:20.129726
2391585 => 2019-04-29 01:10:20.129726
2397816 => 2019-04-29 01:10:20.129726
2407517 => 2019-04-29 01:10:20.129726
2414469 => 2019-04-29 01:10:20.129726
2418046 => 2019-04-29 01:10:20.129726
2423945 => 2019-04-29 01:10:20.129726
2424766 => 2019-04-29 01:10:20.129726
2427032 => 2019-04-29 01:10:20.129726
2428184 => 2019-04-29 01:10:20.129726
2428344 => 2019-04-29 01:10:20.129726
2430683 => 2019-04-29 01:10:20.129726
2436704 =

In [20]:
# Create a subquery to find the most recent "updated_at" record per woeid
loc_subq = db.session.query(Location.woeid, func.max(Location.updated_at) \
                        .label("max_updated_at")) \
                        .group_by(Location.woeid) \
                        .subquery()

# results = db.session.query(Location).filter( Location.updated_at == max_update_time ).all()

results = db.session.query(Location) \
                    .filter( and_( \
                            Location.woeid == 2352824, \
                            Location.woeid == loc_subq.c.woeid, \
                            Location.updated_at == loc_subq.c.max_updated_at \
                           )) \
                    .order_by(Location.woeid).all()

print( len(results) )

for r in results:
    print(f"Database 'locations': {r.woeid}: {r.name_full} => updated_at: {r.updated_at}")

1
Database 'locations': 2352824: Albuquerque, New Mexico, United States => updated_at: 2019-04-29 01:10:20.129726


In [22]:
results[0]

<Location Albuquerque, New Mexico, United States [updated_at: 2019-04-29 01:10:20.129726>

In [24]:
# # db.session.rollback()
# trend_max_update_at_list = db.session.query(Trend.woeid, func.max(Trend.updated_at) \
#                                         .label("max_updated_at")) \
#                                         .group_by(Trend.woeid) \
#                                         .order_by(Trend.woeid) \
#                                         .all()
# print( len(trend_max_update_at_list) )
# for m in trend_max_update_at_list:
#     print( f"{m.woeid} => {m.max_updated_at}")


In [25]:
# Create a subquery to find the most recent "updated_at" record per woeid
trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at) \
                        .label("max_updated_at")) \
                        .group_by(Trend.woeid) \
                        .subquery()

# Query to pull all of the most recent Trends (50 per entry in 'locations' table)
results = db.session.query(Trend) \
                    .filter( and_( \
                            Trend.woeid == 2352824, \
                            Trend.woeid == trend_subq.c.woeid, \
                            Trend.updated_at == trend_subq.c.max_updated_at \
                           )) \
                    .order_by(Trend.updated_at.desc(), Trend.woeid).all()

print( len(results) )

for r in results:
    print(f"Database 'locations': {r.woeid}: {r.twitter_name} {r.twitter_tweet_name} => updated_at: {r.updated_at}")

50
Database 'locations': 2352824: Albuquerque #GameofThrones => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Arya => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #BattleOfWinterfell => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Endgame => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #ThankYouAvengers => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Night King => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Bran => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Cersei => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #DemThrones => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #GOTS8E3 => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuqu

In [26]:
# Testing if this is working for all combinations
for a_date_range in [ "UTC", None, "all", "", ":", "2019-03-01", "2019-03-01:", ":2019-06-01", "2019-03-01:2019-06-30", ":UTC"]:
    (q_start_date, q_end_date) = parse_date_range(a_date_range)
    print(f"a_date_range: '{a_date_range}' => q_start_date '{q_start_date}', q_end_date '{q_end_date}'")

a_date_range: 'UTC' => q_start_date 'ERROR', q_end_date 'ERROR''
a_date_range: 'None' => q_start_date '2000-01-01', q_end_date '2100-12-31''
a_date_range: 'all' => q_start_date '2000-01-01', q_end_date '2100-12-31''
a_date_range: '' => q_start_date '2000-01-01', q_end_date '2100-12-31''
a_date_range: ':' => q_start_date '2000-01-01', q_end_date '2100-12-31''
a_date_range: '2019-03-01' => q_start_date '2019-03-01', q_end_date '2019-03-01''
a_date_range: '2019-03-01:' => q_start_date '2019-03-01', q_end_date '2100-12-31''
a_date_range: ':2019-06-01' => q_start_date '2000-01-01', q_end_date '2019-06-01''
a_date_range: '2019-03-01:2019-06-30' => q_start_date '2019-03-01', q_end_date '2019-06-30''
a_date_range: ':UTC' => q_start_date '2000-01-01', q_end_date 'ERROR''


In [None]:
q_end_date == "ERROR"

In [None]:
abc_d = date(2019, 6, 30)

In [None]:
abc_dt = datetime(2019, 6, 30, 12, 44)

In [None]:
abc_d >= abc_dt.date()

In [42]:
a_date_range = "4/29/19:4/30/19"
q_start_date, q_end_date = parse_date_range(a_date_range)
print(f"a_date_range: '{a_date_range}' => q_start_date '{q_start_date}', q_end_date '{q_end_date}'")

print( f"{q_start_date} to {q_end_date}")

trend_subq = db.session.query(Trend.woeid, func.max(Trend.updated_at).label("max_updated_at")) \
                        .group_by(Trend.woeid) \
                        .subquery()

# Query to pull all of the most recent Trends (50 per entry in 'locations' table)
results = db.session.query(Trend) \
                    .filter( and_( \
                            Trend.woeid == 2352824, \
                            Trend.updated_at >= q_start_date, \
                            Trend.updated_at < q_end_date, \
                           )) \
                    .order_by(Trend.updated_at.desc(), Trend.woeid).all()

print( len(results) )

for r in results:
    print(f"Database 'locations': {r.woeid}: {r.twitter_name} {r.twitter_tweet_name} => updated_at: {r.updated_at}")

a_date_range: '4/29/19:4/30/19' => q_start_date '2019-04-29', q_end_date '2019-04-30'
2019-04-29 to 2019-04-30
50
Database 'locations': 2352824: Albuquerque #GameofThrones => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Arya => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #BattleOfWinterfell => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Endgame => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #ThankYouAvengers => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Night King => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Bran => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque Cersei => updated_at: 2019-04-29 04:46:21.259684
Database 'locations': 2352824: Albuquerque #DemThrones => updated_at: 2019-04-29 04:46:21.259684
Database 'locations'

In [None]:
results[0].my_location.woeid

In [None]:
results[338].columns

In [None]:
# Update trends table
# n_location_trends = update_db_trends_table()
# print(n_location_trends)

In [None]:
# Read all locations
retval = get_all_locations()
print(len(retval))
pprint(retval)

In [None]:
# Read one location - e.g., 2352824 (Albuquerque)
retval = get_info_for_location(2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read all trends
retval = get_all_trends()
print(len(retval))
pprint(retval)

In [None]:
# Read trends for one location - e.g., 2352824 (Albuquerque)
retval = get_trends_for_location(2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read only the top trends for one location - e.g., 2352824 (Albuquerque)
retval = get_top_trends_for_location(2352824)
print(len(retval))
pprint(retval)

In [None]:
# Read all locations with specified tweet in its trends list - e.g., "#SriLanka"
retval = get_locations_with_tweet("#SriLanka")
print(len(retval))
pprint(retval)

In [None]:
# Check on the query of locations by tweet - e.g., "Carlos Vela" appeared for 2358820 (Baltimore)
retval = get_trends_for_location(2358820)
print(len(retval))
pprint(retval)

In [None]:
db.session.close()