**Documentation**


https://indico.io/docs#sentiment



In [1]:
import indicoio

# Pandas
import pandas as pd
pd.set_option('display.max_colwidth', -1)

# JSON
import json
import simplejson
import ijson

# MISC
from pprint import pprint
from tqdm import tqdm
import urllib
import wget
import re

from geopy.geocoders import Nominatim

## Using Indico API

In [2]:
# Reading in the data (Testing)
response = urllib.request.urlopen("http://sfhomeless.herokuapp.com/tweets")
json_data = simplejson.load(response, encoding='UTF-8')
filename = 'tweets.json'

with open(filename, 'w') as outfile:
     json.dump(json_data, outfile, sort_keys = True, indent = 4, ensure_ascii=False)

In [3]:
#Using this to skip the docs
with open(filename, 'r') as f:
    objects = ijson.items(f, 'docs.item')
    tweets = list(objects)
    pprint(tweets)

[{'_id': '758312666800742401',
  'created_at': 'Wed Jul 27 14:46:43 +0000 2016',
  'geo': None,
  'text': 'Two homeless men on the bus stop in San Francisco '
          'https://t.co/pb47bQtHDH #aged #alcohol #alcoholic'},
 {'_id': '758312732596768768',
  'created_at': 'Wed Jul 27 14:46:58 +0000 2016',
  'geo': None,
  'text': "Should San Francisco's tech firms be taxed to help the homeless? "
          'https://t.co/kkiovTCtVM'},
 {'_id': '758313898583793664',
  'created_at': 'Wed Jul 27 14:51:36 +0000 2016',
  'geo': None,
  'text': 'Jane Kim\'s office "pushing hard" to get fence built around park to '
          'keep homeless out. #sf #missiondistrict  https://t.co/XPSkYNQuLq'},
 {'_id': '758313970176372736',
  'created_at': 'Wed Jul 27 14:51:53 +0000 2016',
  'geo': None,
  'text': 'RT @jrivanob: Jane Kim\'s office "pushing hard" to get fence built '
          'around park to keep homeless out. #sf #missiondistrict  '
          'https://t.co/XPSk…'},
 {'_id': '758315129607950337',


In [4]:
# Finding Columns
tweet_data = []
row_names = []
for t in tweets[0:]:
    row_names = list(t.keys())
    tweet_data.append(list(t.values()))

In [5]:
tweet_df = pd.DataFrame(data=tweet_data,columns = row_names)

In [6]:
tweet_df['SentimentScore'] = tweet_df['text'].apply(lambda phrase: indicoio.sentiment(phrase))
tweet_df['SentimentScore_HQ'] = tweet_df['text'].apply(lambda phrase: indicoio.sentiment_hq(phrase))

In [7]:
tweet_df

Unnamed: 0,geo,_id,text,created_at,SentimentScore,SentimentScore_HQ
0,,758312666800742401,Two homeless men on the bus stop in San Francisco https://t.co/pb47bQtHDH #aged #alcohol #alcoholic,Wed Jul 27 14:46:43 +0000 2016,0.558840,0.711591
1,,758312732596768768,Should San Francisco's tech firms be taxed to help the homeless? https://t.co/kkiovTCtVM,Wed Jul 27 14:46:58 +0000 2016,0.084194,0.354989
2,,758313898583793664,"Jane Kim's office ""pushing hard"" to get fence built around park to keep homeless out. #sf #missiondistrict https://t.co/XPSkYNQuLq",Wed Jul 27 14:51:36 +0000 2016,0.442301,0.620854
3,,758313970176372736,"RT @jrivanob: Jane Kim's office ""pushing hard"" to get fence built around park to keep homeless out. #sf #missiondistrict https://t.co/XPSk…",Wed Jul 27 14:51:53 +0000 2016,0.340497,0.662840
4,,758315129607950337,RT @ProPublica: Should tech be doing more to help San Francisco's homeless population? https://t.co/B9nCDGjdAZ,Wed Jul 27 14:56:30 +0000 2016,0.872479,0.575983
5,,758316964548845568,"Homeless crisis, in San Francisco ~... https://t.co/U9G6pZSoNp",Wed Jul 27 15:03:47 +0000 2016,0.500457,0.678693
6,,758317418632450050,RT @sfchronicle: Open Forum: Farrell asks voters to OK plan to remove #SanFrancisco #homeless encampments. https://t.co/faphAVHsOb https://…,Wed Jul 27 15:05:36 +0000 2016,0.632849,0.667005
7,,758317909185703936,FREE 🍕 @ King St 'til 10 am! Stop by and buy a raffle ticket or make a donation to the SF Homeless Prenatal Program! https://t.co/wcLCVlMCoq,Wed Jul 27 15:07:33 +0000 2016,0.467153,0.280226
8,,758333451380649985,RT @AmicisBestPizza: FREE 🍕 @ King St 'til 10 am! Stop by and buy a raffle ticket or make a donation to the SF Homeless Prenatal Program! h…,Wed Jul 27 16:09:18 +0000 2016,0.476221,0.408733
9,,758334986126696448,We can explain it -- plus some solutions: https://t.co/nZNNS5lLXV\n#sfhomelessproject https://t.co/hFvqVzn3ab,Wed Jul 27 16:15:24 +0000 2016,0.852342,0.607982


## Reverse Geocoder

In [15]:
tweet_df_new = tweet_df['geo'][tweet_df['geo'].isnull() == False]
for i in tweet_df_new:
    geolocator = Nominatim()
    location = geolocator.reverse(i['coordinates'])
    print(location.address)

Bill Graham Civic Auditorium, Hayes Street, West SoMa, SF, California, 94117, United States of America
