# Acquire

- https://www.latimes.com/california/story/2020-03-16/los-angeles-parking-ticket-street-sweeping-coronavirus-covid19
- https://www.latimes.com/california/story/2020-10-15/street-sweeping-parking-enforcement-resumes-today
- https://abc7.com/society/las-resumed-parking-enforcement-prompts-outcry/7079278/

- https://www.theeastsiderla.com/site/about_the_eastsider/
- https://xtown.la/2020/10/15/parking-ticket-los-angeles/

In [1]:
import numpy as np
import pandas as pd
import os
import spacy

from pdfminer.high_level import extract_text
from transformers import pipeline



In [2]:
# Allocate a pipeline for question-answering
question_answerer = pipeline('question-answering')
question_answerer({
    'question': 'What is the name of the repository ?',
    'context': 'Pipeline have been included in the huggingface/transformers repository'
})

{'score': 0.5135613305454356,
 'start': 35,
 'end': 59,
 'answer': 'huggingface/transformers'}

In [3]:
# Load in the text from a pdf using pdfminer
extract_text('city-documents/city-council/LADOT-transition-plan.pdf')[:500]

'CITY OF LOS ANGELES\nINTER-DEPARTMENTAL MEMORANDUM\n\nDate:\n\nSeptember 17, 2020\n\nTo:\n\nHonorable City Council\nc/o City Clerk, Room 395, City Hall\nAttention: Honorable Mike Bonin, Chair, Transportation Committee\n\nFrom:\n\nSeleta J. Reynolds, General Manager ^ \nDepartment of Transportation\n\nSubject:\n\nTRANSITION PLAN TO RESUME PARKING ENFORCEMENT FOR PREVIOUSLY SUSPENDED \n\nPARKING INFRACTIONS AND PROPOSED ECONOMIC RELIEF MEASURES FOR PARKING \n\nFINES\n\nSUMMARY\n\nThe Los Angeles Department of Transportation '

In [4]:
# Create an empty dictionary to scrape text from all the pdfs
# stored in the city-documents folder.
documents = []

for root, dirs, files in os.walk("city-documents/"):
    for file in files:
        # If the ends with .pdf, display the path
        if file.endswith(".pdf"):
            print(os.path.join(root, file))
            # Add the filename and relative path as a dictionary to documents
            documents.append({'pdf_name': file,
                              'path': os.path.join(root, file)})

city-documents/city-council/LADOT-transition-plan.pdf
city-documents/city-council/public-outreach-period.pdf
city-documents/city-council/relief-report-motion.pdf
city-documents/city-council/relief-program-report-121720.pdf
city-documents/public-comments/public-comments-parking-enforcement.pdf
city-documents/LADOT/enforcement.pdf
city-documents/LADOT/citation-pay-program.pdf


In [5]:
# Convert the list of dictionaries into a dataframe
pdfs = pd.DataFrame(documents)
pdfs

Unnamed: 0,pdf_name,path
0,LADOT-transition-plan.pdf,city-documents/city-council/LADOT-transition-p...
1,public-outreach-period.pdf,city-documents/city-council/public-outreach-pe...
2,relief-report-motion.pdf,city-documents/city-council/relief-report-moti...
3,relief-program-report-121720.pdf,city-documents/city-council/relief-program-rep...
4,public-comments-parking-enforcement.pdf,city-documents/public-comments/public-comments...
5,enforcement.pdf,city-documents/LADOT/enforcement.pdf
6,citation-pay-program.pdf,city-documents/LADOT/citation-pay-program.pdf


In [6]:
# Collect the relative paths for each pdfs
pdfs_to_scrape = []

for index, file in pdfs.iterrows():
    print(file['path'])
    pdfs_to_scrape.append(file['path'])

city-documents/city-council/LADOT-transition-plan.pdf
city-documents/city-council/public-outreach-period.pdf
city-documents/city-council/relief-report-motion.pdf
city-documents/city-council/relief-program-report-121720.pdf
city-documents/public-comments/public-comments-parking-enforcement.pdf
city-documents/LADOT/enforcement.pdf
city-documents/LADOT/citation-pay-program.pdf


In [7]:
# Display all file paths
pdfs_to_scrape

['city-documents/city-council/LADOT-transition-plan.pdf',
 'city-documents/city-council/public-outreach-period.pdf',
 'city-documents/city-council/relief-report-motion.pdf',
 'city-documents/city-council/relief-program-report-121720.pdf',
 'city-documents/public-comments/public-comments-parking-enforcement.pdf',
 'city-documents/LADOT/enforcement.pdf',
 'city-documents/LADOT/citation-pay-program.pdf']

In [8]:
# Create an empty list to store text extracted from each pdf.
text = []

# Scrape the text from each pdf and store the result in text
for file in pdfs_to_scrape:
    text.append(extract_text(file))

In [9]:
# Display the number of documents stored in the variable `text`.
len(text)

7

In [10]:
# Display text from the first document
print(text[0][:200])

CITY OF LOS ANGELES
INTER-DEPARTMENTAL MEMORANDUM

Date:

September 17, 2020

To:

Honorable City Council
c/o City Clerk, Room 395, City Hall
Attention: Honorable Mike Bonin, Chair, Transportation Com


In [11]:
# Display the first 200 characters of each docuemnt in the variable `text`.
for i in range(0, len(text)):
    print(f"\nDOCUMENT #{i+1}")
    print("-----------------")
    print(text[i][:20].strip())


DOCUMENT #1
-----------------
CITY OF LOS ANGELES

DOCUMENT #2
-----------------
MOTION

3 0 A

I MOV

DOCUMENT #3
-----------------
TRANSPORTATION

MOTI

DOCUMENT #4
-----------------
File No. 20-1365

TR

DOCUMENT #5
-----------------
Communication from P

DOCUMENT #6
-----------------
FOR IMMEDI

DOCUMENT #7
-----------------
FOR IMMEDI


In [12]:
# Load the large English NLP model
nlp = spacy.load('en_core_web_lg')

# The text we want to examine

# Parse the text with spaCy. This runs the entire pipeline.
doc = nlp(text[0])

# 'doc' now contains a parsed version of text. We can use it to do anything we want!
# For example, this will print out all the named entities that were detected:
for entity in doc.ents:
    print(f"{entity.text:<25} ({entity.label_})")

CITY OF                   (ORG)
LOS ANGELES               (GPE)
September 17, 2020        (DATE)
City Council              (ORG)
395                       (CARDINAL)
City Hall
                (FAC)
Mike Bonin                (PERSON)
Transportation Committee

 (ORG)
Seleta J. Reynolds        (PERSON)
Department of Transportation

 (ORG)
The Los Angeles Department of Transportation (ORG)
LADOT                     (ORG)
CF 20-0147-S7             (ORG)
the City Council          (ORG)
1                         (CARDINAL)
LADOT                     (ORG)
October 1, 2020           (DATE)
overnight                 (TIME)
2                         (CARDINAL)
LADOT                     (ORG)
October 15, 2020          (DATE)
3                         (CARDINAL)
LADOT                     (ORG)
October 22, 2020          (DATE)
4                         (CARDINAL)
LADOT                     (ORG)
January 1, 2021           (DATE)
March 4, 2020             (DATE)
the City Council          (ORG)
the Decla

In [13]:
public_comments = text[4].replace("\xa0", ' ')
public_comments = public_comments.replace("\n", ' ')

In [14]:
doc = nlp(public_comments)

filtered_tokens = [token for token in doc if not token.is_stop]

In [20]:
for i in doc.sents:
    print(i)

Communication from Public      Name: Date Submitted: Council File
No: Comments for Public Posting:
 
My comments are in reference to item #3 of today's  Armen Makasjian 08/17/2020 01:30 PM 20-0147-S7   Transportation Committee Meeting.
It is important that street sweeping continue through neighborhoods.
There are people sleeping in cars that have been parked for several weeks.
On North Berendo St. just north of Hollywood Blvd., there are several vehicles, including a large RV that have been parked for several weeks without moving.
This has resulted in an infestation of rats and vermin, including roaches.
It is important that the city continue street sweeping to prevent blight and unsanitary conditions.
Please reconsider the motion to CONTINUE street sweeping.
Thank you!
  
Communication from Public  Scott P. 09/28/2020 09:01 PM
20-0147-S7       Name: Date Submitted: Council File
No: Comments for Public Posting:  
My name is name is Scott and I live in El Sereno and I want to encourage

In [24]:
# Allocate a pipeline for sentiment-analysis
classifier = pipeline('sentiment-analysis')

for i in doc.sents:
    print(classifier(str(i)), i)

[{'label': 'NEGATIVE', 'score': 0.9835943579673767}] Communication from Public      Name: Date Submitted: Council File
[{'label': 'NEGATIVE', 'score': 0.9898175597190857}] No: Comments for Public Posting:
[{'label': 'POSITIVE', 'score': 0.7481210827827454}]  
[{'label': 'NEGATIVE', 'score': 0.7790317535400391}] My comments are in reference to item #3 of today's  Armen Makasjian 08/17/2020 01:30 PM 20-0147-S7   Transportation Committee Meeting.
[{'label': 'POSITIVE', 'score': 0.999167799949646}] It is important that street sweeping continue through neighborhoods.
[{'label': 'NEGATIVE', 'score': 0.9912372827529907}] There are people sleeping in cars that have been parked for several weeks.
[{'label': 'NEGATIVE', 'score': 0.9960814118385315}] On North Berendo St. just north of Hollywood Blvd., there are several vehicles, including a large RV that have been parked for several weeks without moving.
[{'label': 'NEGATIVE', 'score': 0.9971234202384949}] This has resulted in an infestation of r

[{'label': 'NEGATIVE', 'score': 0.9985065460205078}] For example, maybe the city can reduce costs by reducing the number and frequency of deployment of street sweeping vehicles and temporarily shift the roles of fleet operators to a different city service.
[{'label': 'NEGATIVE', 'score': 0.9981938004493713}] Plainly, the loss of revenue is a challenge for the city, but recovering that revenue stream should not come on the backs of LA residents who are still dealing with the economic and social impacts of COVID-19.
[{'label': 'NEGATIVE', 'score': 0.9540457129478455}] I urge City Council to do what's best for its constituents and extend the temporary suspension of parking restrictions and enforcement.
[{'label': 'NEGATIVE', 'score': 0.952495276927948}]   Communication from Public  Daniel Gaines 09/30/2020 09:37 AM 20-0147-S7       Name: Date Submitted: Council File
[{'label': 'NEGATIVE', 'score': 0.9898175597190857}] No: Comments for Public Posting:  
[{'label': 'POSITIVE', 'score': 0.9

[{'label': 'NEGATIVE', 'score': 0.894432544708252}] I find it comical that I don't see the same homeless encampments and abandoned cars just across the city border on Huntington Drive in Alhambra.
[{'label': 'NEGATIVE', 'score': 0.9992703795433044}] Could it be that their Police Department just sends them our way since they know that the City of Los Angeles will do nothing about it?
[{'label': 'POSITIVE', 'score': 0.904024064540863}] Please I beg you council, do something about it.
[{'label': 'POSITIVE', 'score': 0.9998352527618408}] Thank You   


In [16]:
# from spacy import displacy

# sentence = text[0]
# sentence_nlp = nlp(sentence)

# # visualize named entities
# displacy.render(sentence_nlp, style='ent', jupyter=True)