# Update location fields


In [6]:
import pymongo
import pandas as pd
import numpy as np
import random
import datetime

In [7]:
def db_connection(collection_name): 
    # connect to mLab DB
    try:
        with open("../credentials/mlab_credentials.txt", 'r', encoding='utf-8') as f:
            [name,password,url,dbname]=f.read().splitlines()
            db_conn = pymongo.MongoClient("mongodb://{}:{}@{}/{}".format(name,password,url,dbname))
            print ("DB connected successfully!!!")
    except pymongo.errors.ConnectionFailure as e:
        print ("Could not connect to DB: %s" % e) 
        
    db = db_conn[dbname]
    collection = db[collection_name]
    
    return collection

In [8]:
db_tweets = db_connection("tweets")
db_tweets_bckp = db_connection("tweets_bckp_181205")

DB connected successfully!!!
DB connected successfully!!!


In [4]:
print("Tweets in DB:", db_tweets.count())

  """Entry point for launching an IPython kernel.


Tweets in DB: 67636


In [19]:
print("Tweets in Backup DB:", db_tweets_bckp.count())

  """Entry point for launching an IPython kernel.


Tweets in Backup DB: 9665


In [9]:
# uncomment to delete
result = db_tweets_bckp.delete_many({})
print(result.deleted_count, " documents deleted")

6153  documents deleted


In [15]:
print(db_tweets_bckp.count())

9665


  """Entry point for launching an IPython kernel.


In [12]:
tweets_list = [tweet for tweet in db_tweets.find()[:100]]
len(tweets_list)

100

In [12]:
db_tweets_bckp.insert_many(tweets_list)
print(db_tweets_bckp.count())

9665


  


In [27]:
for tweet in tweets_list:
    
    # Format the datetime field
    date_str = tweet['created_at']
    date_obj = datetime.datetime.strptime(date_str, '%a %b %d %H:%M:%S %z %Y')
    tweet['datetime'] = datetime.datetime.combine(date_obj.date(), date_obj.time())
        
    if tweet["coordinates"] is not None and tweet["coordinates"]["type"] == "Point":
        tweet['lat'] = tweet["coordinates"]["coordinates"][1]
        tweet['lon'] = tweet["coordinates"]["coordinates"][0]
    else:
        tweet['lat'] = None
        tweet['lon'] = None
    
    db_tweets.replace_one({"id":tweet["id"]}, tweet, upsert = True)
    #print(tweet)

In [10]:
location_query = {
    "lat": {
        #"$ne": None
        "$exists": True
    }
}
datetime_query = {
    "datetime": {
        "$exists": True
    }
}

In [11]:
for tweet in db_tweets.find(location_query)[:10]:
    print(tweet["coordinates"],tweet['lat'],tweet['lon'])

None None None
None None None
None None None
None None None
None None None
None None None
None None None
None None None
None None None
None None None


In [36]:
print("Tweets in DB:", db_tweets.count())
print("Tweets in DB (location):", db_tweets.count(location_query))
print("Tweets in DB (datetime):", db_tweets.count(datetime_query))

  """Entry point for launching an IPython kernel.
  
  This is separate from the ipykernel package so we can avoid doing imports until


Tweets in DB: 10544
Tweets in DB (location): 10544
Tweets in DB (datetime): 10544


In [27]:
tweets_list[14]

{'_id': ObjectId('5c004831523ec489f07f4e34'),
 'created_at': 'Thu Nov 29 20:09:58 +0000 2018',
 'id': 1068235643652722688,
 'id_str': '1068235643652722688',
 'text': 'Report: MMA Pioneer Cal Worsham Dead At 55 https://t.co/2qaw6OEIkO #CalWorsham #MMA',
 'truncated': False,
 'entities': {'hashtags': [{'text': 'CalWorsham', 'indices': [67, 78]},
   {'text': 'MMA', 'indices': [79, 83]}],
  'symbols': [],
  'user_mentions': [],
  'urls': [{'url': 'https://t.co/2qaw6OEIkO',
    'expanded_url': 'http://po.st/LhS8wh',
    'display_url': 'po.st/LhS8wh',
    'indices': [43, 66]}]},
 'metadata': {'iso_language_code': 'en', 'result_type': 'recent'},
 'source': '<a href="http://twitter.com" rel="nofollow">Twitter Web Client</a>',
 'in_reply_to_status_id': None,
 'in_reply_to_status_id_str': None,
 'in_reply_to_user_id': None,
 'in_reply_to_user_id_str': None,
 'in_reply_to_screen_name': None,
 'user': {'id': 244274941,
  'id_str': '244274941',
  'name': '♛Fit4aKingMMA♛',
  'screen_name': 'Fit4aKin

In [29]:
hashtag_query = { 
    "entities": { 
        "hastags": {
            "$elemMatch": {
                "text": {
                    "$eq": "MMA"
                }
            }
        } 
    } 
}
for tweet in db_tweets.find(hashtag_query):
    print(tweet)