In [None]:
# Import Libraries
from textblob import TextBlob
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
import nltk
import re
from wordcloud import WordCloud, STOPWORDS
from PIL import Image
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from langdetect import detect
# from nltk.stem import SnowballStemmer
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# from sklearn.feature_extraction.text import CountVectorizer
import nltk
nltk.download('vader_lexicon')
from IPython.display import display
import geopy
from pathlib import Path  
from bs4 import BeautifulSoup
import urllib
import json
import ast
import geopandas as gpd
import snscrape.modules.twitter as sntwitter
import datetime 
from matplotlib import colors
# import matplotlib.cm as cm
from pylab import text


## Master functions
*Functions that I would use everywhere*

### State Data Collection Functions 


#### County Data
**Problem: No comprehensive county dataset had existed that I could access, so I wrote a generator**

*NOTE*- Useful source linked in code
- Main source used was usa.com
- Beautiful soup was used for this. 
- Essentially, I downloaded the html for a given state's county data, read through it until I got to the beggining of an HTML table
- I isolated the table, and then went through its rows and cells. Each time constructing a new corresponding array

In [None]:

# The below function takes a given state, and returns a dataframe with every county and its corresponding square milage
def county_sqFootage(state):
	state = re.sub(' ', '-', state)
	url = urllib.request.urlopen('http://www.usa.com/rank/' +state.lower() + '-state--land-area--county-rank.htm')
	path = url.read()
	# empty list
	data = []
	# for getting the header from
	# the HTML file
	list_header = []
	soup = BeautifulSoup((path),'html.parser')
	header = soup.find_all("table")[1].find("tr")
	for items in header:
		try:
			list_header.append(items.get_text())
		except:
			continue
	# for getting the data
	HTML_data = soup.find_all("table")[1].find_all("tr")[1:]
	for element in HTML_data:
		sub_data = []
		for sub_element in element:
			try:
				sub_data.append(sub_element.get_text())
			except:
				continue
		data.append(sub_data)
	# Storing the data into Pandas
	# DataFrame
	df = pd.DataFrame(data = data, columns = list_header)
	df.drop(df.columns[[0]],axis = 1,inplace = True)
	df.rename(columns = {'County / Population':'County','Land Area ▼':'Area'}, inplace = True)
	# Removing the section of the data for population information and deleting it
	formatdf= df['County'].str.split(',',expand = True)
	formatdf = formatdf[0]
	# recombining the data
	df = df['Area'].str.split(' sq', expand = True)
	df = df.replace({',':''}, regex=True)	
	df = df[0]
	df= pd.concat([df,formatdf],axis=1,join = 'inner')
	df.columns = ['Area','County']
	df['County']=df['County'].str.upper()
	df['Radius'] = df['Area'].astype(float)
	df['Radius'] = df['Radius'].apply(lambda x: (x/3.1415)**.5)
	# Search Radius is what the Twitter API will end up using
	return df	

	# Main code logic copied from Codegeeks https://www.geeksforgeeks.org/convert-html-table-into-csv-file-in-python/



#### County Location
*While radius and area information is all well and good, I can't do anything if I don't know where the county is*
- If the state info exists, then pull it up!
- If it doesn't..
    - First I pull my county data from a github county location dataset. While it is incomplete for the data I need, it has longitude and lattitude coordinates in one place, which I need
    - I open the webpage, convert it to a dataframe, and drop the unnececary columns. Then I isolate only the data relavent to the state I'm searching
    - From here I call the previous square footage locator, and merge both dataframes by their County name. 
    - I save this file in the "State Information" folder for future use

In [None]:

def getCountyInfo(state):
    
    csv_folder = 'State information'
    file_path = os.path.join(file_dir, csv_folder, (state+'.csv'))
    print((Path.cwd() / csv_folder /(state+'.csv')).exists())    
    if((Path.cwd() / csv_folder /(state+'.csv')).exists()):
        data = pd.read_csv(file_path)
    else:
        # make a program that uses this dataset https://raw.githubusercontent.com/grammakov/USA-cities-and-states/master/us_cities_states_counties.csv
        # to get all the cities in a state
        data = pd.read_csv('https://raw.githubusercontent.com/grammakov/USA-cities-and-states/master/us_cities_states_counties.csv',sep='|')
        del data['State short'], data['City'],data['City alias']
        data.columns = data.columns.str.replace('State full', 'State')
        data = data[data.State == state]
        data.drop_duplicates(subset='County', keep='first', inplace=True)
        del data['State']
        coordinates = []
        for county in data['County']:
            print(county)
            locator = geopy.Photon(user_agent="myGeocoder")
            location = locator.geocode(county.lower() + ',' + state + ', United States')
            # Make a new column in my dataframe with the lat and long
            coordinates.append([location.latitude,location.longitude])
        data['Coordinates'] = coordinates
        area = county_sqFootage(state)
        data = pd.merge(data,area, on= 'County', how = 'outer')
        data.rename(columns = {'Area':'Area (sq/mi)','Radius':'Radius (mi)'}, inplace = True)
        data.to_csv(file_path)
    return data



### Wordcloud generator
*All this function does is initialize the wordcloud data in one simple place so it's cleaner later

In [None]:
def create_wordcloud(text,keyword):
    file_path = os.path.join(file_dir, 'Keyword State')
    mask = np.array(Image.open('cloud.png'))
    stopwords = set(STOPWORDS)
    wc = WordCloud(background_color='white',
    mask = mask,
    max_words=3000,
    stopwords=stopwords,
    repeat=True)
    wc.generate(str(text))
    name = (state) + " on the term " + keyword+ ".png"
    path= os.path.join(file_path,name)
    wc.to_file(path)
    print("Word Cloud Saved Successfully")
    display(Image.open(path))

In [None]:
def pullTweets(keyword,geocode,noOfTweet,time):
    tweets_list2 = []
    for j,tweet in enumerate(sntwitter.TwitterSearchScraper(keyword + ' '+ geocode + ' ' + time).get_items()):
        tweets_list2.append([tweet.date, tweet.id, tweet.content, tweet.user.username])
        if j == noOfTweet-1:
            return tweets_list2
    return tweets_list2
def percentage(part,whole):
    return 100 * float(part)/float(whole)
def average(arr):
    if len(arr)>0:
        return round((sum(arr) / len(arr)),4)

In [None]:

noOfTweet = 50000
state = 'Texas'
countyDF =  (getCountyInfo(state))
keyword = 'mexican'
currentYear = datetime.datetime.now().year
positive = 0
negative = 0
neutral = 0
polarity = 0
tweet_list = []
neutral_list = []
negative_list = []
positive_list = []
file_dir = Path.cwd()

cityDict = {}
json_folder = 'Keyword State'
file_path = os.path.join(file_dir, json_folder, (((state) + " on the term " + keyword +'.json')))

# extract the county collumn from the countyDF and convert its values to a list
countyList = countyDF['County'].values.tolist()
usefulTweets = 0
users = []
county_needed = []
tweets_list2 = []
total_data = []


## Getting Started!
### Failsafe Code
 - One of the biggest issues that ended up plaguing this project was not that of failed logic, but a failed computer
 - As such, I had to make sure that at every instance that took any amount of time, the work done could be recovered

### What below does
- The below code checks to see if a cache file currently exists for the given county, and if it does, checks what counties data has been efficiently scraped from

In [None]:
#Sentiment Analysis

# make a function that counts upward from 2000 to the current year
# if file_path exists, set the county_needed list equal to all the items from the county list not present in the keys of the json
if((Path.cwd() / json_folder /(((state) + " on the term " + keyword +'.json'))).exists()):
    f = open(file_path, 'r')
    cityDict = json.load(f)
    checked_counties = cityDict.keys()
    # set county_needed equal to the items in county that aren't present in checked_counties
    county_needed = countyList-checked_counties
    print(county_needed)

    print(len(tweet_list))
else:
    county_needed = countyList

## Fetching the data
NOTE: Due to the way the logic was constructed,it was simply easier to have this large block of code for the loop as opposed to functions. Having the code in one place was more effective for me
### Steps
#### The Loops: ***County***->Time-> Tweets
_Go through every possible tweet in the catagory. This order was decided due to efficiency in scraping._
- Accesing the API was probably the most confusing part. Twitter's search has built in search keycodes, so all I needed to do when I requested a batch of tweets, was send a search command with the keycodes for time, and location
- Location was determined through the radius of a circle equivalent in area to the county. This proved to be effective in most cases, and was a good enough comprimise while avoiding too much overlap
- At the end of each county's iteration, the master dictionary was updated,the cache file was opened, and the cache file would resave. This trick was how I was also able to pause runs. 
   - Pausing runs proved especially helpful when the searched data could take 30+ hours.

### County -> ***Time*** -> Tweets
- The county specific dictionary was initialized, saved to cityDict. Additionally, the pandas dataframe that would later be iterated was constructed
 ### County -> Time -> ***Tweets***
 - The code loops through every saved tweet from the given batch, and passes it to Vader. Vader returns 4 useful datapoints. 
    - Polarity: How spread out was the data?
    - Negative: How negative was the sentiment for the tweet?
    - Positive: How positive was the sentiment for the tweet?
    - Neutral: How neutral was the sentiment for the tweet?
    - Composite: What was the net sentiment?
 - Due to some issues with the construction of the composite score, it was actually calculated here using the same averaging algorithm
 - For later word analysis depending on the composite score, the tweets were then sorted into lists of their respective net sentimen


In [None]:

for county in county_needed:
    coordinates = (countyDF.loc[countyDF['County'] == county]['Coordinates'])
    coordinates = np.array(coordinates.values.tolist())[0]
    coordinates = ast.literal_eval(coordinates)
    lat = coordinates[0]
    longi = coordinates[1]
    miles = [countyDF.loc[countyDF['County'] == county]['Radius (mi)'].values[0]][0]
    geocode = 'geocode:' + str(coordinates[0]) + ',' + str(coordinates[1]) + ',' + str(miles) + 'mi'
    compScore = []
    for year in range(2000,currentYear+1):
        timeSearchParams = 'until:' + str(year) + '-12-31' + ' since:' + str(year) + '-01-01'
        tweets_list2 = pullTweets(keyword,geocode,noOfTweet,timeSearchParams)
        # One of the issues found was that due to the repeated run command, this would have trouble breaking on its own

        # Thus I wrote it into a function that would break and return the needed data at the limi
        tweets_df = pd.DataFrame(tweets_list2, columns=['Datetime', 'Tweet Id', 'Text', 'Username'])
        location = county
        if location not in cityDict.keys():
            cityDict[location] = [0,[],{}]
        posList = []
        negList = []
        neuList = []
        compList = []
        posCount,negCount,neuCount = 0,0,0
        for index,tweet in tweets_df.iterrows():
            # location = tweet.user.location
            
            username = tweet['Username']
            tweetText = tweet['Text']
            analysis = TextBlob(tweetText)
            score = SentimentIntensityAnalyzer().polarity_scores(tweetText)
            # SCORE RETURNING IS FINE
            neg = score['neg']

            neu = score['neu']
            pos = score['pos']

            comp = score['compound']
            # comp variable still works

            # ISSUE IS WITH UP HERE!!!
            # sortTweetCondition = (username not in users) and (tweetText not in tweet_list)
            sortTweetCondition = True
            if (sortTweetCondition == True):
                # append each score to their respective list
                posList.append(pos)
                negList.append(neg)
                neuList.append(neu)
                compList.append(comp)
                cityDict[location][0]+=1
                usefulTweets+=1
                polarity += analysis.sentiment.polarity
                if neg > pos:
                    negative_list.append(tweetText)
                    negative += 1
                    negCount+=1
                elif pos > neg:
                    positive_list.append(tweetText)
                    positive += 1
                    posCount+=1
                elif pos == neg:
                    neutral_list.append(tweetText)
                    neutral += 1
                    neuCount+=1
                users.append(username)
                tweet_list.append(tweetText)
            
            # save the cityDict to a json file at directory file_path
            
        pos = average(posList)
        neg = average(negList)
        neu = average(neuList)    
        comp = average(compList)    
        count = posCount+negCount+neuCount
        if(len(posList) > 0):
            compScore.append(comp)
            cityDict[county][2][year] = {'pos':pos,'posCount':posCount,'neg':neg,'negCount':negCount,'neu':neu,'neuCount':neuCount,'count':count,'comp':comp}
        # else:
        #     print(county + ' returned no tweets')
    print("For the county of " + str(county) + ", there were " + str(cityDict[location][0]) + " unique tweets")
    cityDict[county][1] = average(compScore)
    with open(file_path, 'w') as fp:
                json.dump(cityDict, fp)
    
    # delete the second index of cityDIct




# make a new dictionary identical to countyDict but without the second index



## Personal Graphs
- The below code essentially mapped out a percentage of sentiment for the tweets. This code gave me a good general idea of the usefulness of my data based on what I may anticipate
- I also did another re-save, just in case


In [None]:
positive = percentage(positive, noOfTweet)
negative = percentage(negative, noOfTweet)
neutral = percentage(neutral, noOfTweet)
polarity = percentage(polarity, noOfTweet)
positive = format(positive, '.1f')
negative = format(negative, '.1f')
neutral = format(neutral, '.1f')
# fully expand the array
tweet_list = list(set(tweet_list))

with open(file_path, 'w') as fp:
            json.dump(cityDict, fp)

### More purely observational data
- Reading lengths of dataframes to check for obvious repetition or trends

In [None]:
#Number of Tweets (Total, Positive, Negative, Neutral)
tweet_list = pd.DataFrame(tweet_list)
neutral_list = pd.DataFrame(neutral_list)
negative_list = pd.DataFrame(negative_list)
positive_list = pd.DataFrame(positive_list)
print('total number: ',len(tweet_list))
print('positive number: ',len(positive_list))
print('negative number: ', len(negative_list))
print('neutral number: ',len(neutral_list))

### Cleanup Tweets
For the word analysis that followed, it was neccecary to filter out special charecters or any artifacts sent by twitter

*NOTE*: This code was copied from the original source, see the main journal entry for my code sources

In [None]:
#Cleaning Text (RT, Punctuation etc)

#Creating new dataframe and new features
tw_list = pd.DataFrame(tweet_list)
tw_list["text"] = tw_list[0]

#Removing RT, Punctuation etc
remove_rt = lambda x: re.sub('RT @\w+: '," ",x)
rt = lambda x: re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)"," ", x)
tw_list["text"] = tw_list.text.map(remove_rt).map(rt)
tw_list["text"] = tw_list.text.str.lower()
tw_list.head(tweet_list.shape[0])

### Creating new convinience dataframes for the pichart later
- Going through each tweet and attaching the term of it's sentiment for easier sorting and dataframe dumping

In [None]:
#Calculating Negative, Positive, Neutral and Compound values
tw_list[['polarity', 'subjectivity']] = tw_list['text'].apply(lambda Text: pd.Series(TextBlob(Text).sentiment))
for index, row in tw_list['text'].iteritems():
    score = SentimentIntensityAnalyzer().polarity_scores(row)
    neg = score['neg']
    neu = score['neu']
    pos = score['pos']
    comp = score['compound']
    if neg > pos:
        tw_list.loc[index, 'sentiment'] = "negative"
    elif pos > neg:
        tw_list.loc[index, 'sentiment'] = "positive"
    else:
        tw_list.loc[index, 'sentiment'] = "neutral"
        tw_list.loc[index, 'neg'] = neg
        tw_list.loc[index, 'neu'] = neu
        tw_list.loc[index, 'pos'] = pos
        tw_list.loc[index, 'compound'] = comp
tw_list.head(10)
#Creating new data frames for all sentiments (positive, negative and neutral)
tw_list_negative = tw_list[tw_list["sentiment"]=="negative"]
tw_list_positive = tw_list[tw_list["sentiment"]=="positive"]
tw_list_neutral = tw_list[tw_list["sentiment"]=="neutral"]

In [None]:
def count_values_in_column(data,feature):
    total=data.loc[:,feature].value_counts(dropna=False)
    percentage=round(data.loc[:,feature].value_counts(dropna=False,normalize=True)*100,2)
    return pd.concat([total,percentage],axis=1,keys=['Total','Percentage'])
#Count_values for sentiment
count_values_in_column(tw_list,"sentiment")

### Constructing the Appeal Chart
- After all the dataframe construction had been done, viewing the pi-chart was one of the best ways to gaugue data reasonability in case I had to stop here   

In [None]:
# create data for Pie Chart
pc = count_values_in_column(tw_list,'sentiment')
names= pc.index
size=pc['Percentage']
 
# Create a circle for the center of the plot
my_circle=plt.Circle( (0,0), 0.7, color='white')
plt.pie(size, labels=names, colors=['green','blue','red'])
p=plt.gcf()
p.gca().add_artist(my_circle)
plt.show()

### Wordcloud construction
- Moreso than the pichart, the wordcloud was VERY helpful in determining how relavent a keyword was. For this project I needed person centered keywords, so if a synonym for person wasn't high up on the wordcloud, I knew the keyword was bunk

In [None]:
cloud = create_wordcloud(tw_list['text'].values,keyword)
# save the wordcloud image

In [None]:

def mapStateData(state,keyword):
    #-------------------------------------
    #-------------------------------------
    # DATA FRAME SETUP
    json_folder = 'Keyword State'
    file_dir = Path.cwd()
    file_path = os.path.join(file_dir, json_folder, (((state) + " on the term " + keyword +'.json')))
    fig, (ax1,ax2) = plt.subplots(ncols=2)
    shape_dir = os.path.join(file_dir, 'Shapes', 'United States','USA_Counties.shx')
    unitedStates = gpd.read_file(shape_dir)
    unitedStates = unitedStates[['STATE_NAME','NAME','geometry']].copy()
    unitedStates = unitedStates[unitedStates.STATE_NAME == state]
    unitedStates['NAME'] = unitedStates['NAME'].str.upper()
    unitedStates.rename(columns={'NAME':'County'}, inplace=True)
    # Okay turn the dictionary into a dataframe where one column is filled with keys, and the other column is the first value in the dictionary
    f = open(file_path, 'r')
    state_dict = json.load(f)
    stateDF = pd.DataFrame.from_dict(state_dict, orient='index')
    stateDF.reset_index(inplace=True)
    print(stateDF)
    # stateDF = stateDF[[0,1,2]]

    stateDF.rename(columns={'index': 'County', 0 :'# of Tweets',1:'score'}, inplace=True)
    # stateDF = stateDF.drop(['dead'],1)
    # pd.DataFrame(stateDF["score"].to_list(), columns=['score'])
    score = []
    j=0
    for value in stateDF['score']:
        if(isinstance(value,float)==False):
            stateDF=stateDF.drop(stateDF.index[[j]])
        j+=1
    j=0
    for value in stateDF['score']:
        if(isinstance(value,float)==False):
            stateDF=stateDF.drop(stateDF.index[[j]])
        j+=1
    stateDF['score'] = stateDF['score'].astype(float)
    unitedStates = pd.merge(unitedStates,stateDF, on= 'County', how = 'outer')
    cm1 = colors.LinearSegmentedColormap.from_list("MyCmapName",["r","b"])
    #-------------------------------------
    #-------------------------------------
    # AVERAGE SENT. AND TOTAL POP.
    # get the sum of all values in a column
    totalTweets = stateDF.iloc[:, 1].sum()
    averageSentiment = 0
    k=0
    for score in stateDF['score']:
        # multiply all values in the score column by the corresponding value in the population column
        averageSentiment += score * stateDF.iat[k,1]
        k+=1
    averageSentiment=averageSentiment/totalTweets
    averageSentiment=format(averageSentiment, '.2f')
    totalTweets = "{:,}".format(totalTweets)



    #-------------------------------------
    #-------------------------------------
    # GRAPHING STUFF
    norm = colors.TwoSlopeNorm(vmin=-.4, vcenter=0, vmax=.4)
    fig.set_size_inches(9, 3)
    title = (state + " on the term \"" + keyword +"\"" )
    fig.suptitle(title, fontsize=15)
    fig.tight_layout()
    ax1 = unitedStates.plot(ax = ax1,cmap = cm1, norm= norm,edgecolor='black',column = 'score',legend = True, legend_kwds={'label':'Sentiment Score','orientation':'vertical'})
    title1 = ('Sentiment')
    textX = .1
    textY = 0

    text(textX, textY,"Mean State Sentiment: " + str(averageSentiment), ha='center', va='center', transform=ax1.transAxes,weight='bold')
    ax1.set_title(title1)
    ax1.set_axis_off()   

    max = int(stateDF.iloc[:, 1].max())
    print(max)



    norm2 = colors.PowerNorm(gamma = .4,vmin=0,vmax=max)
    ax2 = unitedStates.plot(ax=ax2,cmap = 'OrRd',norm = norm2,edgecolor='black',column = '# of Tweets',legend = True, legend_kwds={'label':'# of Tweets','orientation':'vertical'})
    title2 = ('# Of Tweets from Unique Users')
    text(textX, textY,"Total Tweets: " + str(totalTweets), ha='center', va='center', transform=ax2.transAxes,weight='bold')
    ax2.set_title(title2)
    ax2.set_axis_off()
    save_dir=os.path.join(file_dir, 'Maps', state)

    if(os.path.isdir(save_dir)):
        save_name = os.path.join(file_dir, 'Maps', state,keyword +'.png')
    else:
        # make a directory in the maps folder with the name of the given state
        os.mkdir(save_dir)
        save_name = os.path.join(file_dir, 'Maps', state,keyword +'.png')
    # save the figure to the maps folder inside the folder corresponding to the state

    plt.savefig(save_name,dpi=300,facecolor='white', transparent=False)

# Bottom left of each map have total average, and the total number
# THE MEAN STATE STUFF ISNT WEIGHTED


### Refresh Maps
*In case I updated how the maps looked, I wanted to be able to retroactivley refresh old maps*

## Plotting the data!



### Creating the dataframe
*Getting all the data in one easy to access place*
- I initialize all the needed path variables, along with the needed cache files.
- This code basically just formats the dataframe in a nice way for me to visually understand in case of a bug

In [None]:
json_folder = 'Keyword State'
file_dir = Path.cwd()
file_path = os.path.join(file_dir, json_folder, (((state) + " on the term " + keyword +'.json')))
shape_dir = os.path.join(file_dir, 'Shapes', 'United States','USA_Counties.shx')
# Okay turn the dictionary into a dataframe where one column is filled with keys, and the other column is the first value in the dictionary
f = open(file_path, 'r')
state_dict = json.load(f)
stateDF = pd.DataFrame.from_dict(state_dict, orient='index')
stateDF.reset_index(inplace=True)
# stateDF = stateDF[[0,1,2]]
stateDF.rename(columns={'index': 'County', 0 :'# of Tweets',1:'score',2:'Time'}, inplace=True)
timeDF = pd.json_normalize(stateDF['Time'])
stateDF.drop('Time',axis = 1,inplace= True)
maxCount = timeDF[list(timeDF.filter(regex=('count')))].max().max()
# sorting the dataframe out for the key information of the given year
# What we need to do now is first test to make sure this data can actually be graphed
# Then go through all the count columns and identify what the largest value is


### The Plotting
*Making the data actually useful to the common eye*

***Time->Map***
#### Dataframe work
- Additionally, you need to sort the master dataframe for the active year data
- Currently unused data is filtered out by fully expanding the cache dictionary, filtering by keywords compliant with the default auto-naming scheme
- To get geopandas to properly store values to a county, you need to attach the data to the geopandas shape array

#### Key Data Info
- For average state sentiment, originally the simple average of all the counties' sentiment was taken. However, I later realized this unfairly weighted counties with nearly no information. Thus a weighted average was taken based on the corresponding mention frequency.
    - nan data points were dropped here 
- The sum of the given year's column was used for a county's mention frequency
#### Mapping
- The biggest visual issue with the maping ended up simply being generating a meaningful colormap for the data. 
- Ater asking around, it seemed that the color red seemed to signify negative sentiment, blue for positive, and purple was intuitivley neutral. Unfortunetly matplotlib didn't have a colormap that met this criteria, so I wrote a very simple colormap object.

In [None]:
import math
for year in range(2006,currentYear+1):
    year = str(year)
    # check if the year is in the dataframe column names
    if(len(list(timeDF.filter(regex=year)))>0):
        slicetimeDF = timeDF[list(timeDF.filter(regex=('County|' + year+'.comp|' + year+'.count')))]
        stateDF = pd.concat([stateDF['County'], slicetimeDF], axis=1)
        stateDF.rename(columns= {stateDF.columns[1]:'# of Tweets',stateDF.columns[2]:'score'}, inplace=True)
        json_folder = 'Keyword State'
        file_dir = Path.cwd()
        file_path = os.path.join(file_dir, json_folder, (((state) + " on the term " + keyword +'.json')))
        fig, (ax1,ax2) = plt.subplots(ncols=2)
        shape_dir = os.path.join(file_dir, 'Shapes', 'United States','USA_Counties.shx')
        unitedStates = gpd.read_file(shape_dir)
        unitedStates = unitedStates[['STATE_NAME','NAME','geometry']].copy()
        unitedStates = unitedStates[unitedStates.STATE_NAME == state]
        unitedStates['NAME'] = unitedStates['NAME'].str.upper()
        unitedStates.rename(columns={'NAME':'County'}, inplace=True)
        # Okay turn the dictionary into a dataframe where one column is filled with keys, and the other column is the first value in the dictionary
        f = open(file_path, 'r')
        score = []
        j=0

        unitedStates = pd.merge(unitedStates,stateDF, on= 'County', how = 'outer')
        cm1 = colors.LinearSegmentedColormap.from_list("MyCmapName",["r","b"])
        #-------------------------------------
        #-------------------------------------
        # AVERAGE SENT. AND TOTAL POP.
        # get the sum of all values in a column
        totalTweets = stateDF.iloc[:, 1].sum()
        averageSentiment = 0
        k=0
        for score in stateDF['score']:
            if(math.isnan(score)==False):
                # multiply all values in the score column by the corresponding value in the population column
                averageSentiment += score * stateDF.iat[k,1]
            k+=1
        print(averageSentiment)
        averageSentiment=averageSentiment/totalTweets
        averageSentiment=format(averageSentiment, '.2f')
        totalTweets = "{:,}".format(totalTweets)



        #-------------------------------------
        #-------------------------------------
        # GRAPHING STUFF
        norm = colors.TwoSlopeNorm(vmin=-.4, vcenter=0, vmax=.4)
        fig.set_size_inches(9, 3)
        title = (state + " on the term \"" + keyword +"\"" + " in " + year)
        fig.suptitle(title, fontsize=15)
        fig.tight_layout()
        ax1 = unitedStates.plot(ax = ax1,cmap = cm1, norm= norm,edgecolor='black',column = 'score',legend = True, legend_kwds={'label':'Sentiment Score','orientation':'vertical'})
        title1 = ('Sentiment')
        textX = .1
        textY = 0

        text(textX, textY,"Mean State Sentiment: " + str(averageSentiment), ha='center', va='center', transform=ax1.transAxes,weight='bold')
        ax1.set_title(title1)
        ax1.set_axis_off()   
        norm2 = colors.PowerNorm(gamma = .4,vmin=0,vmax=maxCount)
        ax2 = unitedStates.plot(ax=ax2,cmap = 'OrRd',norm = norm2,edgecolor='black',column = '# of Tweets',legend = True, legend_kwds={'label':'# of Tweets','orientation':'vertical'})
        title2 = ('# Of Tweets')
        text(textX, textY,"Total Tweets: " + str(totalTweets), ha='center', va='center', transform=ax2.transAxes,weight='bold')
        ax2.set_title(title2)
        ax2.set_axis_off()
        save_dir=os.path.join(file_dir, 'Maps', state,keyword)
        
        if(os.path.isdir(save_dir)):
            save_name = os.path.join(file_dir, 'Maps', state,keyword,year +'.png')
        else:
            # make a directory in the maps folder with the name of the given state
            os.mkdir(save_dir)
            save_name = os.path.join(file_dir, 'Maps', state,keyword,year +'.png')
        # save the figure to the maps folder inside the folder corresponding to the state

        plt.savefig(save_name,dpi=300,facecolor='white', transparent=False)

### Saving the gif!
*Going through all the saved time maps in the keyword directory, and merging them into a gif*

In [None]:
import imageio
images = []
path = os.path.join(file_dir, 'Maps', state,keyword)
files = os.listdir(path)
print(files)
# for f in files:
#     f = f[:-4]
for filename in files:

    images.append(imageio.imread(os.path.join(path,filename)))
save = os.path.join(path,keyword+'.gif')
imageio.mimsave(save, images,duration=1)

In [None]:
def refreshMaps():
    folders = (os.path.join(file_dir,'Maps'))
    subfolders =os.listdir(folders)
    print(subfolders)
    for state in subfolders:
        print(state)
        path = os.path.join(file_dir, 'Maps', state)
        files = os.listdir(path)
        for f in files:
            f = f[:-4]
            mapStateData(state,f)
