In [37]:
#These three windows are for the GeoTweetReturn results. 
#Just put your csv in the path and change the title in the third cell for the map. The rest should work automatically. 

import plotly.plotly
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()

#pass a CSV of tweets into df, run sentiment analysis
#Create a path to the csv and read it into a Pandas DataFrame

csv_path = '/Users/justin/Documents/2018/ucsd/GroupProject1/Sentiment-Search-Engine/output/holdallthetweets.csv'
df = pd.read_csv(csv_path, header=None)
df.columns = ["Longitude","Latitude","Tweet Text", "City&State", "Lat&Long Together"]
df.head()

#create a list to hold compound sentiments
compound_sentiments = []

# use iterrows to iterate through pandas dataframe
for index, row in df.iterrows():
    target_sample = row["Tweet Text"]
    
    # Run Vader Analysis on each tweet
    results = analyzer.polarity_scores(target_sample)

    # Run analysis
    compound = results["compound"]

    
    # Add each value to the appropriate list
    compound_sentiments.append(compound)

column_values = pd.Series(compound_sentiments)
df.insert(loc=5, column="Compound Sentiment",value=column_values)

df.head()    

Unnamed: 0,Longitude,Latitude,Tweet Text,City&State,Lat&Long Together,Compound Sentiment
0,-122.436232,47.495315,@jacobc hey hey!! Are you in Seattle? Probably...,"Seattle, WA","[-122.436232, 47.4953154]",0.6988
1,-77.158594,38.940225,#صباح_الخير من أمام مبنى الكونقرس من #واشنطن 💚...,"Bethesda, MD","[-77.158594, 38.940225]",0.0
2,-111.083219,32.057802,"Arian, Emilie and anahi are the nostalgic frie...","Tucson, AZ","[-111.083219, 32.057802]",0.4767
3,-75.280284,39.871811,Telling me I have to cut coffee out of my diet...,"Philadelphia, PA","[-75.280284, 39.871811]",0.1027
4,-79.76259,40.477383,#oliviagarden #giveawaygoodies #ecohairbrush M...,"New York, USA","[-79.76259, 40.477383]",0.7096


In [38]:
avgsent = df.groupby(['City&State'], as_index=True).agg(                             #get the mean sentiment for each city
                      {'Compound Sentiment':['mean']})

avgcount = df.groupby(['City&State'], as_index=True).agg(                             #get the mean sentiment for each city
                      {'Compound Sentiment':['count']})

lat = df.groupby(['City&State'], as_index=True)['City&State', 'Latitude'].head(1)         # get the lat 
lat1 = lat.set_index(['City&State'])

long = df.groupby(['City&State'], as_index=True)['City&State', 'Longitude'].head(1)     #same for long
long1 = long.set_index(['City&State'])

formap = pd.concat([avgsent, avgcount, lat1, long1], axis=1)
#latlong = pd.concat([lat1, long1],axis=1,)
formap.head()
formap = formap.rename( columns={"(tweet sentiment, mean)": "avg sentiment", "(tweet sentiment, count)": "count"})
formap = formap.reset_index()
formap.columns = ['city', 'mean sentiment', 'count', 'lat', 'long']
formap['mean sentiment'] = formap['mean sentiment'].apply(lambda x: x*100) #make the mean sentiment go from -100 to 100
formap.shape
formap.head()


Unnamed: 0,city,mean sentiment,count,lat,long
0,"Aberdeen, NC",0.0,1,35.114539,-79.476683
1,"Absecon, NJ",-45.88,1,39.385879,-74.558351
2,"Ada, OK",0.0,1,34.729522,-96.717492
3,"Addison, IL",0.0,1,41.903958,-88.063656
4,"Agawam, MA",0.0,1,42.03108,-72.710598


In [39]:
#Everything should be good to go here just change the title
formap['text'] = formap['city'] + '<br>Mean Sentiment: ' + (formap['mean sentiment']).astype(str) + '<br>Tweet Count: ' + (formap['count']).astype(str)
limits = [(0,10000)]
cities = []


for i in range(len(limits)):
    
    lim = limits[i]
    formap_sub = formap[lim[0]:lim[1]]  
    city = dict(
        type = 'scattergeo',
        locationmode = 'USA-states',
        lon = formap_sub['long'],         
        lat = formap_sub['lat'],
        text=formap_sub['text'],
        mode='markers',
        marker = dict(
            size = formap_sub['count'] * 10,
            opacity = .7,
            color = formap_sub['mean sentiment'],
            colorscale='Blackbody',
            showscale=True, 
            reversescale = False,
            colorbar= dict(title = 'Sentiment Score <br>(100 = Positive) <br>(-100 = Negative)', titleseide = 'top'),
            line = dict(width=0.5, color='rgb(40,40,40)'),
            sizemode = 'area'
        ),
    )       
    cities.append(city)

layout = dict(
        title = 'Facebook Sentiment',             #change the title here. 
        showlegend = False,
        geo = dict(
            scope='usa',
            projection=dict( type='albers usa' ),
            showland = True,
            landcolor = 'rgb(217, 217, 217)',
            subunitwidth=1,
            countrywidth=1,
            subunitcolor="rgb(255, 255, 255)",
            countrycolor="rgb(255, 255, 255)"
            
        ),
    )

fig = dict( data=cities, layout=layout )
plotly.offline.plot( fig, validate=False, filename='d3-bubble-map-populations.html' )

'file:///Users/justin/Documents/2018/ucsd/GroupProject1/Sentiment-Search-Engine/d3-bubble-map-populations.html'