Created by: [SmirkyGraphs](http://smirkygraphs.github.io/). Code: [Github](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [PVD-311](http://www.providenceri.gov/pvd-311/).
<hr>

# Providence 311 Requests Word Clouds

This notebook contains code used to create word clouds using the descriptions given in 311 requests for the 5 most commonly reported types. The word clouds are the top 200 most common words with the categories common words and stopwords removed. All words are converted into lowercase with punctuation and special characters removed.

In [1]:
import re
import numpy as np
import pandas as pd

from PIL import Image
from wordcloud import WordCloud, STOPWORDS

In [2]:
def collect_words(df):
    
    # getting all descriptions into a list
    df = df[df['description'].notnull()]
    text = df['description'].tolist()
    
    # lowercasing and removing extra spaces
    text = [x.strip().lower() for x in text]
    text = [x for x in text if x != '']
    
    # joining all text into a corpus
    text = ' '.join(text)
    
    # removing non a-z characters
    text = re.sub(r'[^A-Za-z ]+', '', text)

    return text

In [3]:
def generate_wordcloud(text, title, stop_words=None):
    
    # adding stopwords
    stopwords = set(STOPWORDS)
    
    if stop_words != None:
        for word in stop_words:
            stopwords.add(word)

    # setting image mask
    mask = np.array(Image.open('./files/mask/providence-hd.png'))

    # setting wordcloud params
    wc = WordCloud(
        font_path='./files/font/trebuc.ttf',
        background_color='#1b1b1b',
        max_font_size=1000,
        mask=mask,
        max_words=200,
        colormap='Spectral',
        stopwords=stopwords,
        normalize_plurals=True,
        random_state=1,
        collocations=False
    )

    # generating wordcloud
    wc = wc.generate(text)

    # saving image
    wc.to_file(f"./output/{title}_output.png")

In [4]:
# load the data to get words by topic
df = pd.read_csv('./data/clean/pvd_311_clean.csv', low_memory=False)

In [5]:
# generate wordcloud for pothole 311 Requests
potholes = df[df['title'] == 'Potholes']
potholes = collect_words(potholes)

stop_words = ['st', 'ave', 'street', 'pothole', 'pot', 'hole', 'potholes']
generate_wordcloud(potholes, 'potholes', stop_words)

In [6]:
# generate wordcloud for graffiti 311 Requests
graffiti = df[df['title'] == 'Graffiti']
graffiti = collect_words(graffiti)

stop_words = ['graffiti', 'st', 'ave', 'street']
generate_wordcloud(graffiti, 'graffiti', stop_words)

In [7]:
# generate wordcloud for trash 311 Requests
trash = df[df['title'].str.contains('Trash')]
trash = collect_words(trash)

stop_words = ['trash', 'st', 'ave', 'street', 'nobb', 'bb', 'ticketbb']
generate_wordcloud(trash, 'trash', stop_words)

In [8]:
# generate wordcloud for tree related 311 Requests
trees = df[df['title'] == 'Tree Related Issues']
trees = collect_words(trees)

stop_words = ['st', 'ave', 'street', 'tree', 'trees']
generate_wordcloud(trees, 'trees', stop_words)

In [9]:
# generate wordcloud for snow plowing 311 Requests
snow = df[df['title'] == 'Snow Plowing, Salting or Sanding']
snow = collect_words(snow)

stop_words = ['st', 'ave', 'street', 'snow', 'plow', 'plowed', 'plowing']
generate_wordcloud(snow, 'snow_plow', stop_words)