Created by: [SmirkyGraphs](http://smirkygraphs.github.io/). Code: [Github](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [PVD-311](http://www.providenceri.gov/pvd-311/).
<hr>

# Providence 311 Requests Data Collection

This notebook contains code used to scrape information from Providence's 311 platform using the api.  
The data is then cleaned using pandas to remove blank columns and convert the timestamp.

In [1]:
from pandas.io.json import json_normalize
import pandas as pd
import requests
import random
import json
import time

In [2]:
# data scraping
url = 'https://vc0.publicstuff.com/api/2.0/requests_list?client_id=1000017&device=iframe&limit=250&'
page_count = 1

frames = []
data = 'placeholder'

while data:
    # send request for data
    page = str(page_count)
    r = requests.get(url + f'page={page}') 

    # get the json response
    data = json.loads(r.content)['response']['requests']

    # get data into a dataframe
    df = pd.DataFrame.from_dict(data)
    df = df.to_json(orient='records')
    df = json_normalize(json.loads(df), meta=['key'], errors='ignore')
    frames.append(df)

    # next page
    page_count += 1

    # random sleep time
    delay = random.uniform(3,7)
    time.sleep(delay)

print(f'[status] page_count: {page_count}')
df = pd.concat(frames, sort=False)
df.to_csv('./data/raw/pvd_311_raw.csv', index=False)

[status] page_count: 172


In [3]:
# data cleaning
df = pd.read_csv('./data/raw/pvd_311_raw.csv', low_memory=False)

# remove request. prefix for columns
df.columns = [x[8:] for x in df.columns]

# remove unwanted columns
cols = [
    'user_follows',
    'user_comments',
    'user_request',
    'rank',
    'primary_attachment.id',
    'primary_attachment.extension',
    'primary_attachment.content_type',
    'primary_attachment.versions.small',
    'primary_attachment.versions.medium',
    'foreign_id'   
]

df = df.drop(columns=cols)

df['date_created'] = df['date_created'].apply(lambda x: pd.Timestamp(x, unit='s', tz='US/Eastern'))
df.to_csv('./data/clean/pvd_311_clean.csv', index=False)