In [2]:
import sys
import subprocess
import pkg_resources

required = {'wikimapia_api', 
            'tweepy',
            'requests',
            'pandas',
            'geopy',
            'folium',
            'matplotlib'}
installed = {pkg.key for pkg in pkg_resources.working_set}
missing = required - installed

if missing:
    python = sys.executable
    subprocess.check_call([python, '-m', 'pip', 'install', *missing], stdout=subprocess.DEVNULL)

import requests as req
import pandas as pd
from itertools import chain
import json
import logging
import configparser
import os
from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import folium
from folium.plugins import MarkerCluster
import wikimapia_api
from wikimapia_api import API,category_api
from typing import List
import re
import time
import random
import matplotlib
import tweepy as tpy
pd.set_option('display.max_rows', 500) 
pd.set_option('max_colwidth', 400)

In [4]:
#utility functions to set up keystore
def write_config(section:str,section_key:str,val:str,ini_file_name='keystore.ini',file_action='w') -> int:
    """
      This is a utility function to help create config files 
      for api keys. 

      Parameters (required):
      section - this is the section of the config file that you'd 
                like to create or add to. 
      section_key - this is the key holding the api key in a section. 
      val - this is the api key
      ini_file_name - this is the config file name.  It should end with 
                .ini 
      file_action - this can be write (w) or append (a)
      Example:
        write_config('GOVDATA','BEA','<your API key>','econ_data_api_config.ini')

      docs: https://docs.python.org/3/library/configparser.html
    """
    config = configparser.RawConfigParser()
#     config[section] = {section_key:val}
 
    config.read(ini_file_name) 
    if section in config.sections():
        print(f'section: {section} already exists')
        curr_section_keys = [*config[section].keys()]
        if section_key in curr_section_keys:      
            resp = input(f'section key: {section_key} already exists. Would you like to overwrite? (Y/N)').lower()
            if resp == 'y' or resp == 'yes':
                print('Overwriting section key...')
                config.set(section, section_key, re.escape(val))
                print('Overwriting complete....')
            else:
                print('Overwriting canceled....')
        else:
            config.set(section, section_key, re.escape(val))
    else:
        config.add_section(section)
        config.set(section, section_key, re.escape(val))
    with open(ini_file_name, 'w') as configfile:
        config.write(configfile)
        
    return 0

def show_config_file_sections(config_doc:str)->List[str]:
    """
    Utility function to show the section headings and keys
    of config file.
    """
    config = configparser.ConfigParser()
    config.read(config_doc)
    return {sect: [*config[sect].keys()] for sect in config.sections()}


def read_config(config_file_name:str,
                config_section:str,
                config_key:str,
               file_name=False) -> str:
    """
    utility function to get the values from the keys in a config file.
    """
    config = configparser.RawConfigParser()
    config.read(config_file_name)
    if file_name:
        return config[config_section][config_key]
    else:
        return config[config_section][config_key].replace('\\','')
# --------------------------------------------------------------------------------------------------------------------
def get_wikimapia_category_map():
    return {
    'school':203,
    'military':516,
    'university':84,
    'park':84,
    'marketplace':1507
}

def get_address_to_coords(addr,default_value:str):
    location_ = geolocator.geocode(addr)
    if location_ == None:
        location_ = geolocator.geocode(default_value)
        print(f'***Found bad value:{addr} replacing with default value {default_value}***')
        return  {'title':default_value,'loc_data':[location_.latitude,location_.longitude]}
    time.sleep(2) #rate limiter
    return {'title':addr,'loc_data':[location_.latitude,location_.longitude]}
    
def get_nearest_places_dict(place_type:str,coords_list_of_lists:list):
    """place type from get_wikimapia_category_map()"""
    return [[
    {'id':loc.get('id'),
     'title':loc.get('title') ,
     'loc_data':[loc['location'].get('lat'),loc['location'].get('lon')]} 
    for loc in list(API.places.nearest(coords_list[1],coords_list[0],category=get_wikimapia_category_map()[place_type]))
                ] for coords_list in coords_list_of_lists]

def make_folium_map(point_of_interest_coords_list_of_dicts,#from get_address_to_coords
                    nearest_coords_list_of_dicts,# from get_nearest_places_dict
                    tweets_coords_list_of_dicts, #list of coords from tweets for clustering
                    map_type="OpenStreetMap",zoom_level=15,
                    point_of_interest_icon_color='green',
                    nearest_coords_icon_color = 'blue',
                    tweet_icon_color = 'red'
                   ):
    #main point of interest will be the first item in the point_of_interest_coords_list_of_dicts
    map_= folium.Map(location=point_of_interest_coords_list_of_dicts[0]['loc_data'],tiles=map_type)
    
    for point_of_interest_coords_dict in point_of_interest_coords_list_of_dicts:
        folium.Marker(
            location=point_of_interest_coords_dict['loc_data'],
            tooltip = point_of_interest_coords_dict['title'],
            icon=folium.Icon(color=point_of_interest_icon_color),
            default_zoom_start= zoom_level
        ).add_to(map_)
    
    for data_dict in nearest_coords_list_of_dicts:       
        #print(data_dict)
        folium.Marker(
            location=data_dict['loc_data'],
            tooltip = data_dict['title'],
            default_zoom_start= zoom_level,
            icon=folium.Icon(color=nearest_coords_icon_color),
        ).add_to(map_)
     
    lat_shift = 0
    long_shift = 0
    for data_dict in tweets_coords_list_of_dicts: 
        #marker_cluster = MarkerCluster().add_to(map_) 
        loc_info = [data_dict['loc_data'][0]+random.randrange(1,100)*.0001,
         data_dict['loc_data'][1]+random.randrange(1,100)*.0001]
        folium.Marker(
            location=loc_info,
            tooltip = data_dict['tweet_text'],
            #default_zoom_start= zoom_level,
            icon=folium.Icon(color=tweet_icon_color ),
        ).add_to(map_)            
    return map_

### Start-up Directions
1. Read through the code so you understand what it is doing.  
2. Copy this notebook into your google drive or onto your computer.
3. Get API keys for Twitter (comsumer key, consumer secret, access token and access secret)
4. Get API keys for Wikimapia (there's just one)
5. Store your keys securely on your device (not in gdrive)
5. Use the write_config function to write your keys to a temp ini file so you don't store them in your copied version of this notebook. You can use the default file name of keystore.ini or your can make your own but I recomment just using the default and deleting the file if you're using gdrive. 
6. Run the notebook and have fun! Please provide feedback if you have any. 



In [5]:
#use this function to write your api keys to the config file. The defualt is set to keystore.ini
write_config('<your section of the config>','<name of the api key>','<your api key>')

0

In [7]:
#if you make a mistake and you need to delete the keystore.ini file run this cell
!rm keystore.ini

In [6]:
#you'll need to use the write_config() function to set up your keystore file once you have api keys
show_config_file_sections('keystore.ini')

{'<your section of the config>': ['<name of the api key>']}

In [None]:
#instantiating the geolocator API
geolocator = Nominatim(user_agent="GeoTweet")

In [None]:
#accessing WIKIMAPIA's api 
API.config.key = read_config('keystore.ini','WIKIMAPIA','wikimapia_api_key')

In [None]:
#accessing Twitter's api
auth = tpy.OAuthHandler(read_config('keystore.ini','TWITTER', 'consumer_key'),
                        read_config('keystore.ini','TWITTER', 'consumer_secret'))
api = tpy.API(auth)
auth.set_access_token(read_config('keystore.ini','TWITTER', 'access_token'), 
                      read_config('keystore.ini','TWITTER', 'access_secret'))

def limit_handled(cursor):
    while True:
        try:
            yield cursor.next()
        except StopIteration:
            break
        except tpy.RateLimitError:
            time.sleep(15 * 60)


# <center>Part 1:Getting Geo-Coords</center>


## Set your place of interest and get coords

In [None]:
################################################################################
#Add your locations to the list below, you can add as many as you like.        #
#Feel free to remove NYC if you're not interested in that location             #
################################################################################
addrs_list = ["NYC"]

In [None]:
poi_data_list_of_dicts =  [get_address_to_coords(addr,default_value=addrs_list[0]) for addr in addrs_list]
poi_data_list_of_dicts

[{'title': 'NYC', 'loc_data': [40.7127281, -74.0060152]}]

In [None]:
################################################################################
#Point of Interest Geocodes for Twitter                             
## You can change the radius to search for tweets here but be aware that it will
## increase the amount of data you will get
################################################################################
radius = '15'
poi_gcode_list= [f"{','.join(list(map(str,poi_data_dict['loc_data'])))},{radius}km" for poi_data_dict in poi_data_list_of_dicts]

## Get places of interest near your place of interest

In [1]:
################################################################################
#Don't run this cell too many times or you'll get rate limited                 #
################################################################################
nearest_places_list_of_dicts = []

for poi_data in poi_data_list_of_dicts:
    print(poi_data['loc_data'])
    dct = get_nearest_places_dict(place_type='military',coords_list_of_lists=[poi_data['loc_data']]) 
    nearest_places_list_of_dicts.append(dct)
    time.sleep(2)
nearest_places_list_of_dicts = list(chain.from_iterable(nearest_places_list_of_dicts))                                      

In [None]:
nearest_places_list_of_lists = list(map(lambda x: x.get('loc_data'),nearest_places_list_of_dicts))
nearest_places_gcodes_list = [f"{','.join(list(map(str,nearest_place_list)))},{radius}km" 
 for nearest_place_list in nearest_places_list_of_lists]

## Get Tweets near the nearest places of interest

In [None]:
################################################################################
#You can change the number of Tweets per location you'd like to retrieve below #
#You can also add your search query.  To do a general Tweet search enter "*".  #
################################################################################
number_of_tweets_to_get = 200
search_query = "<add your search query>"

#------------------------------------------------------------------------------
tweet_df = pd.concat([pd.DataFrame(
[    {'creation_date':tweet.created_at,
           'text':tweet.text,
           'hashtags':tweet.entities.get('hashtags'),
           'user_name':tweet.user.name,
           'screen_name':tweet.user.screen_name,
          'user_id':tweet.user.id,
      'geo':tweet.geo,
      'coords':tweet.coordinates,
      'place':tweet.place,
      'entities':tweet.user.entities,
      'location':tweet.user.location,
      'protected_status':tweet.user.protected,
      'geo_enabled':tweet.user.geo_enabled
             } 
 for tweet in limit_handled(tpy.Cursor(
     api.search,q=search_query,count=number_of_tweets_to_get,geocode=gcode).items(number_of_tweets_to_get))] 
) for gcode in nearest_places_gcodes_list]).reset_index(drop=True)
tweet_df.head()

In [None]:
#getting coordinates of general tweet locations
tweet_df['location_coords'] = tweet_df['location'].apply(lambda x: get_address_to_coords([x],addrs_list[0]) )

In [None]:
list_of_tweets = tweet_df.text.tolist()
tweet_location_coords_list_of_dicts = tweet_df.location_coords.tolist()

In [None]:
for t in range(len(list_of_tweets)):
    tweet_location_coords_list_of_dicts[t].update({'tweet_text':list_of_tweets[t]})    

## Make map

In [None]:
################################################################################
# Now you can see your results!                                                #
# You can hover over each marker to see the Tweet text                         #
################################################################################
make_folium_map(
    point_of_interest_coords_list_of_dicts=poi_data_list_of_dicts,
    nearest_coords_list_of_dicts=  nearest_places_list_of_dicts,
    tweets_coords_list_of_dicts=tweet_df.location_coords.tolist(),
)