# Massachusetts Eviction Data and Housing Court Statistics

In [2]:
# import code block
from selenium import webdriver
from bs4 import BeautifulSoup
import zipfile
import pandas as pd
from io import StringIO
import requests
import csv
import time
import random
import matplotlib.pyplot as plt
from urllib.request import urlopen
import re
from pandas import ExcelWriter

## Get all the links for dates and store them in a list

In [3]:
html = urlopen('https://masslandlords.net/policy/eviction-data/?ct=t%28Event+Webinar+2021-01-22+T-21+v1%29')
bs = BeautifulSoup(html, 'html.parser')

# collect all of the dates urls into one list
urls = bs.find_all('a',{'href':re.compile('https://masslandlords.net/policy/eviction-data/filings-week-ending-*')})

# clean urls of unnecessary attribute data - only need href
urls = [url['href'] for url in urls]

# we want the urls for the first 6 weeks of data, let's verify we have them by printing them out
# Should be weeks 10/24 to 11/28 i n2020
print(urls[0:6])

['https://masslandlords.net/policy/eviction-data/filings-week-ending-2020-10-24/', 'https://masslandlords.net/policy/eviction-data/filings-week-ending-2020-10-31/', 'https://masslandlords.net/policy/eviction-data/filings-week-ending-2020-11-07/', 'https://masslandlords.net/policy/eviction-data/filings-week-ending-2020-11-14/', 'https://masslandlords.net/policy/eviction-data/filings-week-ending-2020-11-21/', 'https://masslandlords.net/policy/eviction-data/filings-week-ending-2020-11-28/']


## Define some functions so we don't have to rewrite code to scrape each week data

In [41]:
def getSoup(url):
    """ param: url of site to get soup for. Returns soup (AKA text) for site url"""
    html = urlopen(url)
    bs = BeautifulSoup(html, 'html.parser')
    soup = bs.find('section',id='main-content').find('p', class_='monospace').get_text()
    return soup

def getSections(soup):
    """param: soup/text for url. Returns text divided into sections for easier conversion to tables"""
    # page is split into 5 sections separted by '--'
    a,b,c,d,e,f =soup.split('--')
    # section c has many different tables within, not split by '--' but rather by newlines. 
    # we'll clean it up using a regular expression (regex) and then split it into 11 sections to better disect each
    # table individually 
    # remove beginning and ending newlines
    reg = re.compile('^\r\n')
    c = reg.sub('',c,count=1)
    reg2 = re.compile('\r\n\r\n\r\n$')
    c=reg2.sub('',c,count=1)
    c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11 = c.split("\r\n\r\n\r\n")
        
    # we'll return all these variables
    return a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f

def convertToDf(htmlTable,cNumSection):
    """param: html table (aka one of the sections divided aboce), and a cNumSection param which is an int indicating
       which section of c (if any) we are converting. This helps us define our indexes for our table.
       returns table as pandas dataframe 
       NOTE: While the startIndex and columnIndex values below work most of the time, there are small discrepancies 
       beterrn data for different weeks that may cause certain tables to come off a column short. Feel free to 
       change the values below as you see fit. If we decide in the future that it is easier to pass them is an parameters, 
       we change this function easily. 
    """
    # set default start values
    startIndex = 0
    endIndex = 0
    columnIndex = 0
    
    if cNumSection in [1,2,4,10,11]:
        startIndex = 2
        columnIndex = 1
    elif cNumSection in [8]:
        startIndex = 1
        columnIndex = 0
    elif cNumSection in [3,5,7,9]:
        startIndex = 1
        columnIndex = 0
    elif cNumSection == 6:
        startIndex = 4
        columnIndex = 3
    else:
        startIndex = 1
        columnIndex = 0
    
    # remove newline at the beginning of the table "block"
    reg = re.compile('^\r\n')
    htmlTable = reg.sub('',htmlTable,count=1)

    # split the table into separate columns
    htmlTable = htmlTable.split('\r\n')
    for index in range(len(htmlTable)):
        htmlTable[index] = re.split('  +',htmlTable[index])

    # load the data into a dataframe
    df = pd.DataFrame(htmlTable[startIndex:],columns=htmlTable[columnIndex])
    return df

def createCsv(dataFrames, sheetNames, currUrlIndex):
    """param: list of data frames, list of strings for respective name of sheet for each data frame, local file path 
       for creation of CSV, and index of current URL (from list created in code block above)
    """
    #name file 
    name=re.sub('\/', '', urls[currUrlIndex])
    name=name[50:]
    print(name)
    
    # Create a Pandas Excel writer using XlsxWriter as the engine.
    # NOTE: Update the below path to your computer's
    writer = pd.ExcelWriter(r'/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'+name+".xlsx")
    
    for i in range(len(dataFrames)):
        if i in [1,3,5]:
            dataFrames[i].to_excel(writer, sheet_name=sheetNames[i],startrow=6 , startcol=0,index=False)
        else:
            dataFrames[i].to_excel(writer, sheet_name=sheetNames[i],startrow=0 , startcol=0,index=False)
    
    #close the Pandas Excel writer and output the Excel file
    writer.save()

# week-ending-2020-10-24


In [5]:
soup = getSoup(urls[0])

In [6]:
a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f = getSections(soup)

In [7]:
# NOTE: Had to do the parsing for d manually, as its formatted differently than the 'c' section
# remove beginning and ending newlines
reg = re.compile('^\r\n')
d = reg.sub('',d,count=1)
reg2 = re.compile('\r\n\r\n\r\n$')
d=reg2.sub('',d,count=1)
d1 = d.split("\r\n\r\n")
d1 = d1[1]
# remove newline at the beginning of the table "block"
reg = re.compile('^\r\n')
d1 = reg.sub('',d1,count=1)

# split the table into separate columns
d1 = d1.split('\r\n')
for index in range(len(d1)):
    d1[index] = re.split('  +',d1[index])

# load the data into a dataframe
countyRentersPercent = pd.DataFrame(d1[1:],columns=['County', 'Households', '% Renters'])

In [8]:
# Create dataframes for tables in 'c'
courts = convertToDf(c1,1)
partyTypeNum = convertToDf(c2,2)
partyTypePercent = convertToDf(c3,3)
plaintiffRepNum = convertToDf(c4,4)
plaintiffRepPercent = convertToDf(c5,5)
defendantRepNum = convertToDf(c6,6)
defendantRepPercent = convertToDf(c7,7)
numAdultsHouseholds = convertToDf(c8,8)
initiatingAction = convertToDf(c9,9)
ratePer100k = convertToDf(c10, 10)
ratePer100kRenters = convertToDf(c11,11)

# display data frames
display(courts)
display(partyTypeNum)
display(partyTypePercent)
display(plaintiffRepNum)
display(plaintiffRepPercent)
display(defendantRepNum)
display(defendantRepPercent)
display(numAdultsHouseholds)
display(initiatingAction)
display(ratePer100k)
display(ratePer100kRenters)
display(countyRentersPercent)


Unnamed: 0,(n),Count,Percent
0,northeast,16,32.7%
1,southeast,8,16.3%
2,eastern,6,12.2%
3,central,6,12.2%
4,metro_south,6,12.2%
5,western,5,10.2%
6,somerville district,1,2.0%
7,taunton district,1,2.0%


Unnamed: 0,(n),Plaintiffs,Defendants
0,Corporate Entity,17,0
1,Natural Person,32,49
2,Total,49,49


Unnamed: 0,(%),Plaintiffs,Defendants
0,Corporate Entity,34.7%,0.0%
1,Natural Person,65.3%,100.0%
2,Total,100.0%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,16,1,17
1,Optional,12,20,32
2,Total,28,21,49


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,32.7%,2.0%,34.7%
1,Optional,24.5%,40.8%,65.3%
2,Total,57.1%,42.9%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,0,0,0
1,Optional,5,44,49
2,Total,5,44,49


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,0.0%,0.0%,0.0%
1,Optional,10.2%,89.8%,100.0%
2,Total,10.2%,89.8%,100.0%


Unnamed: 0,Number of Adults in Households,Count,Percent
0,1,36,73.5%
1,2,10,20.4%
2,3,3,6.1%
3,Total,49,100.0%


Unnamed: 0,Initiating Action,Count,Percent
0,Cause,21,42.9%
1,Non-payment,18,36.7%
2,No Fault,5,10.2%
3,Foreclosure,3,6.1%
4,Other,2,4.1%


Unnamed: 0,Municipality,Residents,Count
0,Vineyard Haven,52.0,1.0
1,Osterville,28.0,1.0
2,East Falmouth,17.0,1.0
3,Topsfield,16.0,1.0
4,Shirley,13.0,1.0
5,Southbridge,11.0,2.0
6,Amesbury,6.0,1.0
7,Brockton,5.0,5.0
8,Newburyport,5.0,1.0
9,Pembroke,5.0,1.0


Unnamed: 0,County,Renter Households,Count
0,Dukes,24.0,1.0
1,Plymouth,20.0,9.0
2,Berkshire,12.0,2.0
3,Essex,11.0,13.0
4,Barnstable,10.0,2.0
5,Hampden,4.0,3.0
6,Middlesex,3.0,7.0
7,Worcester,3.0,4.0
8,Bristol,2.0,2.0
9,Suffolk,2.0,4.0


Unnamed: 0,County,Households,% Renters
0,Barnstable,96509,20.0%
1,Berkshire,53792,30.0%
2,Bristol,220528,37.3%
3,Dukes,18146,22.6%
4,Essex,297898,36.9%
5,Franklin,30927,34.1%
6,Hampden,179970,39.5%
7,Hampshire,60002,30.7%
8,Middlesex,612366,38.1%
9,Nantucket,11399,30.8%


## Write to CSV 

In [9]:
dataFrames = [courts,partyTypeNum,partyTypePercent,plaintiffRepNum,plaintiffRepPercent,defendantRepNum,
              defendantRepPercent,numAdultsHouseholds,initiatingAction,ratePer100k,ratePer100kRenters,countyRentersPercent]
sheetNames = ['Courts','Party Type','Party Type','Plaintiff Representation', 'Plaintiff Representation',
             'Defendant Representation','Defendant Representation','Number of Adults in Households',
             'Initiating Action','Municipality rate per 100k','County rate per 100k','County Households % Renters']
# path = '/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'

createCsv(dataFrames, sheetNames, 0)

week-ending-2020-10-24


# week-ending-2020-10-31

In [30]:
soup = getSoup(urls[1])
a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f = getSections(soup)

# NOTE: Had to do the parsing for d manually, as its formatted differently than the 'c' section
# remove beginning and ending newlines
reg = re.compile('^\r\n')
d = reg.sub('',d,count=1)
reg2 = re.compile('\r\n\r\n\r\n$')
d=reg2.sub('',d,count=1)
d1 = d.split("\r\n\r\n")
d1 = d1[1]
# remove newline at the beginning of the table "block"
reg = re.compile('^\r\n')
d1 = reg.sub('',d1,count=1)

# split the table into separate columns
d1 = d1.split('\r\n')
for index in range(len(d1)):
    d1[index] = re.split('  +',d1[index])

# load the data into a dataframe
countyRentersPercent = pd.DataFrame(d1[1:],columns=['County', 'Households', '% Renters'])

# Create dataframes for tables in 'c'
courts = convertToDf(c1,1)
partyTypeNum = convertToDf(c2,2)
partyTypePercent = convertToDf(c3,3)
plaintiffRepNum = convertToDf(c4,4)
plaintiffRepPercent = convertToDf(c5,5)
defendantRepNum = convertToDf(c6,6)
defendantRepPercent = convertToDf(c7,7)
numAdultsHouseholds = convertToDf(c8,8)
initiatingAction = convertToDf(c9,9)
ratePer100k = convertToDf(c10, 10)
ratePer100kRenters = convertToDf(c11,11)

# display data frames
display(courts)
display(partyTypeNum)
display(partyTypePercent)
display(plaintiffRepNum)
display(plaintiffRepPercent)
display(defendantRepNum)
display(defendantRepPercent)
display(numAdultsHouseholds)
display(initiatingAction)
display(ratePer100k)
display(ratePer100kRenters)
display(countyRentersPercent)

Unnamed: 0,(n),Count,Percent
0,southeast,20,21.1%
1,northeast,19,20.0%
2,central,17,17.9%
3,metro_south,11,11.6%
4,eastern,9,9.5%
5,western,5,5.3%
6,malden district,3,3.2%
7,pittsfield district,3,3.2%
8,natick district,2,2.1%
9,bmc east boston,2,2.1%


Unnamed: 0,(n),Plaintiffs,Defendants
0,Corporate Entity,33,0
1,Natural Person,62,95
2,Total,95,95


Unnamed: 0,(%),Plaintiffs,Defendants
0,Corporate Entity,34.7%,0.0%
1,Natural Person,65.3%,100.0%
2,Total,100.0%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,32,1,33
1,Optional,26,36,62
2,Total,58,37,95


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,33.7%,1.1%,34.7%
1,Optional,27.4%,37.9%,65.3%
2,Total,61.1%,38.9%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,0,0,0
1,Optional,9,86,95
2,Total,9,86,95


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,0.0%,0.0%,0.0%
1,Optional,9.5%,90.5%,100.0%
2,Total,9.5%,90.5%,100.0%


Unnamed: 0,Number of Adults in Households,Count,Percent
0,0,7.0,7.4%
1,1,52.0,54.7%
2,2,27.0,28.4%
3,3,8.0,8.4%
4,5,1.0,1.1%
5,Total,95.0,100.0%
6,Note: Households may appear with zero adults e...,,


Unnamed: 0,Initiating Action,Count,Percent
0,Non-payment,34,35.8%
1,Cause,30,31.6%
2,Other,14,14.7%
3,No Fault,13,13.7%
4,Foreclosure,4,4.2%


Unnamed: 0,Municipality,Residents,Count
0,Baldwinville,52.0,1.0
1,Sherborn,48.0,2.0
2,Provincetown,33.0,1.0
3,Eastham,20.0,1.0
4,Yarmouth Port,19.0,1.0
5,Dover,17.0,1.0
6,East Falmouth,17.0,1.0
7,Whitinsville,14.0,1.0
8,Upton,13.0,1.0
9,Orange,12.0,1.0


Unnamed: 0,County,Renter Households,Count
0,Barnstable,36.0,7.0
1,Plymouth,23.0,10.0
2,Worcester,15.0,17.0
3,Berkshire,12.0,2.0
4,Bristol,12.0,10.0
5,Essex,9.0,10.0
6,Franklin,9.0,1.0
7,Middlesex,6.0,16.0
8,Hampshire,5.0,1.0
9,Norfolk,5.0,5.0


Unnamed: 0,County,Households,% Renters
0,Barnstable,96509,20.0%
1,Berkshire,53792,30.0%
2,Bristol,220528,37.3%
3,Dukes,18146,22.6%
4,Essex,297898,36.9%
5,Franklin,30927,34.1%
6,Hampden,179970,39.5%
7,Hampshire,60002,30.7%
8,Middlesex,612366,38.1%
9,Nantucket,11399,30.8%


In [31]:
dataFrames = [courts,partyTypeNum,partyTypePercent,plaintiffRepNum,plaintiffRepPercent,defendantRepNum,
              defendantRepPercent,numAdultsHouseholds,initiatingAction,ratePer100k,ratePer100kRenters,countyRentersPercent]
sheetNames = ['Courts','Party Type','Party Type','Plaintiff Representation', 'Plaintiff Representation',
             'Defendant Representation','Defendant Representation','Number of Adults in Households',
             'Initiating Action','Municipality rate per 100k','County rate per 100k','County Households % Renters']
# path = '/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'

createCsv(dataFrames, sheetNames, 1)

week-ending-2020-10-31


# week-ending-2020-11-07


In [42]:
soup = getSoup(urls[2])
a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f = getSections(soup)

# NOTE: Had to do the parsing for d manually, as its formatted differently than the 'c' section
# remove beginning and ending newlines
reg = re.compile('^\r\n')
d = reg.sub('',d,count=1)
reg2 = re.compile('\r\n\r\n\r\n$')
d=reg2.sub('',d,count=1)
d1 = d.split("\r\n\r\n")
d1 = d1[1]
# remove newline at the beginning of the table "block"
reg = re.compile('^\r\n')
d1 = reg.sub('',d1,count=1)

# split the table into separate columns
d1 = d1.split('\r\n')
for index in range(len(d1)):
    d1[index] = re.split('  +',d1[index])

# load the data into a dataframe
countyRentersPercent = pd.DataFrame(d1[1:],columns=['County', 'Households', '% Renters'])

# Create dataframes for tables in 'c'
courts = convertToDf(c1,1)
partyTypeNum = convertToDf(c2,2)
partyTypePercent = convertToDf(c3,3)
plaintiffRepNum = convertToDf(c4,4)
plaintiffRepPercent = convertToDf(c5,5)
defendantRepNum = convertToDf(c6,6)
defendantRepPercent = convertToDf(c7,7)
numAdultsHouseholds = convertToDf(c8,8)
initiatingAction = convertToDf(c9,9)
ratePer100k = convertToDf(c10, 10)
ratePer100kRenters = convertToDf(c11,11)

# display data frames
display(courts)
display(partyTypeNum)
display(partyTypePercent)
display(plaintiffRepNum)
display(plaintiffRepPercent)
display(defendantRepNum)
display(defendantRepPercent)
display(numAdultsHouseholds)
display(initiatingAction)
display(ratePer100k)
display(ratePer100kRenters)
display(countyRentersPercent)

Unnamed: 0,(n),Count,Percent
0,northeast,42,26.1%
1,southeast,40,24.8%
2,central,19,11.8%
3,eastern,16,9.9%
4,western,14,8.7%
5,metro_south,11,6.8%
6,haverhill district,9,5.6%
7,malden district,5,3.1%
8,fall river district,3,1.9%
9,barnstable district,1,0.6%


Unnamed: 0,(n),Plaintiffs,Defendants
0,Corporate Entity,63,1
1,Natural Person,98,160
2,Total,161,161


Unnamed: 0,(%),Plaintiffs,Defendants
0,Corporate Entity,39.1%,0.6%
1,Natural Person,60.9%,99.4%
2,Total,100.0%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,57,6,63
1,Optional,39,59,98
2,Total,96,65,161


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,35.4%,3.7%,39.1%
1,Optional,24.2%,36.6%,60.9%
2,Total,59.6%,40.4%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,1,0,1
1,Optional,6,154,160
2,Total,7,154,161


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,0.6%,0.0%,0.6%
1,Optional,3.7%,95.7%,99.4%
2,Total,4.3%,95.7%,100.0%


Unnamed: 0,Number of Adults in Households,Count,Percent
0,0,14.0,8.7%
1,1,94.0,58.4%
2,2,44.0,27.3%
3,3,6.0,3.7%
4,4,1.0,0.6%
5,5,1.0,0.6%
6,6,1.0,0.6%
7,Total,161.0,100.0%
8,Note: Households may appear with zero adults e...,,


Unnamed: 0,Initiating Action,Count,Percent
0,Non-payment,92,57.1%
1,Cause,29,18.0%
2,Other,19,11.8%
3,No Fault,15,9.3%
4,Foreclosure,6,3.7%


Unnamed: 0,Municipality,Residents,Count
0,Rowe,254,1
1,Shelburne Falls,56,1
2,West Yarmouth,37,2
3,Provincetown,33,1
4,Dunstable,31,1
...,...,...,...
64,Taunton,1,1
65,(not given),0,1
66,Boston,0,2
67,Cambridge,0,1


Unnamed: 0,County,Renter Households,Count
0,Bristol,43.0,36.0
1,Barnstable,41.0,8.0
2,Essex,31.0,35.0
3,Franklin,28.0,3.0
4,Dukes,24.0,1.0
5,Hampshire,16.0,3.0
6,Plymouth,16.0,7.0
7,Middlesex,12.0,29.0
8,Hampden,11.0,8.0
9,Worcester,11.0,13.0


Unnamed: 0,County,Households,% Renters
0,Barnstable,96509,20.0%
1,Berkshire,53792,30.0%
2,Bristol,220528,37.3%
3,Dukes,18146,22.6%
4,Essex,297898,36.9%
5,Franklin,30927,34.1%
6,Hampden,179970,39.5%
7,Hampshire,60002,30.7%
8,Middlesex,612366,38.1%
9,Nantucket,11399,30.8%


In [43]:
dataFrames = [courts,partyTypeNum,partyTypePercent,plaintiffRepNum,plaintiffRepPercent,defendantRepNum,
              defendantRepPercent,numAdultsHouseholds,initiatingAction,ratePer100k,ratePer100kRenters,countyRentersPercent]
sheetNames = ['Courts','Party Type','Party Type','Plaintiff Representation', 'Plaintiff Representation',
             'Defendant Representation','Defendant Representation','Number of Adults in Households',
             'Initiating Action','Municipality rate per 100k','County rate per 100k','County Households % Renters']
# path = '/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'

createCsv(dataFrames, sheetNames, 2)

week-ending-2020-11-07


# week-ending-2020-11-14


In [44]:
soup = getSoup(urls[3])
a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f = getSections(soup)

# NOTE: Had to do the parsing for d manually, as its formatted differently than the 'c' section
# remove beginning and ending newlines
reg = re.compile('^\r\n')
d = reg.sub('',d,count=1)
reg2 = re.compile('\r\n\r\n\r\n$')
d=reg2.sub('',d,count=1)
d1 = d.split("\r\n\r\n")
d1 = d1[1]
# remove newline at the beginning of the table "block"
reg = re.compile('^\r\n')
d1 = reg.sub('',d1,count=1)

# split the table into separate columns
d1 = d1.split('\r\n')
for index in range(len(d1)):
    d1[index] = re.split('  +',d1[index])

# load the data into a dataframe
countyRentersPercent = pd.DataFrame(d1[1:],columns=['County', 'Households', '% Renters'])

# Create dataframes for tables in 'c'
courts = convertToDf(c1,1)
partyTypeNum = convertToDf(c2,2)
partyTypePercent = convertToDf(c3,3)
plaintiffRepNum = convertToDf(c4,4)
plaintiffRepPercent = convertToDf(c5,5)
defendantRepNum = convertToDf(c6,6)
defendantRepPercent = convertToDf(c7,7)
numAdultsHouseholds = convertToDf(c8,8)
initiatingAction = convertToDf(c9,9)
ratePer100k = convertToDf(c10, 10)
ratePer100kRenters = convertToDf(c11,11)

# display data frames
display(courts)
display(partyTypeNum)
display(partyTypePercent)
display(plaintiffRepNum)
display(plaintiffRepPercent)
display(defendantRepNum)
display(defendantRepPercent)
display(numAdultsHouseholds)
display(initiatingAction)
display(ratePer100k)
display(ratePer100kRenters)
display(countyRentersPercent)

Unnamed: 0,(n),Count,Percent
0,northeast,103,20.8%
1,central,96,19.4%
2,southeast,80,16.1%
3,eastern,70,14.1%
4,western,45,9.1%
5,metro_south,24,4.8%
6,lowell district,11,2.2%
7,pittsfield district,11,2.2%
8,fall river district,9,1.8%
9,ayer district,4,0.8%


Unnamed: 0,(n),Plaintiffs,Defendants
0,Corporate Entity,232,0
1,Natural Person,264,496
2,Total,496,496


Unnamed: 0,(%),Plaintiffs,Defendants
0,Corporate Entity,46.8%,0.0%
1,Natural Person,53.2%,100.0%
2,Total,100.0%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,226,6,232
1,Optional,77,187,264
2,Total,303,193,496


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,45.6%,1.2%,46.8%
1,Optional,15.5%,37.7%,53.2%
2,Total,61.1%,38.9%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,0,0,0
1,Optional,7,489,496
2,Total,7,489,496


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,0.0%,0.0%,0.0%
1,Optional,1.4%,98.6%,100.0%
2,Total,1.4%,98.6%,100.0%


Unnamed: 0,Number of Adults in Households,Count,Percent
0,0,34.0,6.9%
1,1,333.0,67.1%
2,2,109.0,22.0%
3,3,17.0,3.4%
4,4,2.0,0.4%
5,5,1.0,0.2%
6,Total,496.0,100.0%
7,Note: Households may appear with zero adults d...,,


Unnamed: 0,Initiating Action,Count,Percent
0,Non-payment,355,71.6%
1,Unknown/Other,78,15.7%
2,Cause,42,8.5%
3,No Fault,17,3.4%
4,Foreclosure,4,0.8%
5,Other,0,0.0%


Unnamed: 0,Municipality,Residents,Count
0,Dennis Port,63,2
1,Westborough,54,10
2,Fall River,48,43
3,West Wareham,46,1
4,Fitchburg,44,18
...,...,...,...
99,South Boston,2,1
100,Dorchester,1,1
101,Malden,1,1
102,(not given),0,23


Unnamed: 0,County,Renter Households,Count
0,Berkshire,105.0,17.0
1,Bristol,96.0,79.0
2,Worcester,81.0,91.0
3,Essex,69.0,76.0
4,Barnstable,67.0,13.0
5,Plymouth,53.0,23.0
6,Hampden,50.0,36.0
7,Hampshire,43.0,8.0
8,Middlesex,28.0,67.0
9,Nantucket,28.0,1.0


Unnamed: 0,County,Households,% Renters
0,Barnstable,96509,20.0%
1,Berkshire,53792,30.0%
2,Bristol,220528,37.3%
3,Dukes,18146,22.6%
4,Essex,297898,36.9%
5,Franklin,30927,34.1%
6,Hampden,179970,39.5%
7,Hampshire,60002,30.7%
8,Middlesex,612366,38.1%
9,Nantucket,11399,30.8%


In [45]:
dataFrames = [courts,partyTypeNum,partyTypePercent,plaintiffRepNum,plaintiffRepPercent,defendantRepNum,
              defendantRepPercent,numAdultsHouseholds,initiatingAction,ratePer100k,ratePer100kRenters,countyRentersPercent]
sheetNames = ['Courts','Party Type','Party Type','Plaintiff Representation', 'Plaintiff Representation',
             'Defendant Representation','Defendant Representation','Number of Adults in Households',
             'Initiating Action','Municipality rate per 100k','County rate per 100k','County Households % Renters']
# path = '/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'

createCsv(dataFrames, sheetNames, 3)

week-ending-2020-11-14


# week-ending-2020-11-21

In [46]:
soup = getSoup(urls[4])
a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f = getSections(soup)

# NOTE: Had to do the parsing for d manually, as its formatted differently than the 'c' section
# remove beginning and ending newlines
reg = re.compile('^\r\n')
d = reg.sub('',d,count=1)
reg2 = re.compile('\r\n\r\n\r\n$')
d=reg2.sub('',d,count=1)
d1 = d.split("\r\n\r\n")
d1 = d1[1]
# remove newline at the beginning of the table "block"
reg = re.compile('^\r\n')
d1 = reg.sub('',d1,count=1)

# split the table into separate columns
d1 = d1.split('\r\n')
for index in range(len(d1)):
    d1[index] = re.split('  +',d1[index])

# load the data into a dataframe
countyRentersPercent = pd.DataFrame(d1[1:],columns=['County', 'Households', '% Renters'])

# Create dataframes for tables in 'c'
courts = convertToDf(c1,1)
partyTypeNum = convertToDf(c2,2)
partyTypePercent = convertToDf(c3,3)
plaintiffRepNum = convertToDf(c4,4)
plaintiffRepPercent = convertToDf(c5,5)
defendantRepNum = convertToDf(c6,6)
defendantRepPercent = convertToDf(c7,7)
numAdultsHouseholds = convertToDf(c8,8)
initiatingAction = convertToDf(c9,9)
ratePer100k = convertToDf(c10, 10)
ratePer100kRenters = convertToDf(c11,11)

# display data frames
display(courts)
display(partyTypeNum)
display(partyTypePercent)
display(plaintiffRepNum)
display(plaintiffRepPercent)
display(defendantRepNum)
display(defendantRepPercent)
display(numAdultsHouseholds)
display(initiatingAction)
display(ratePer100k)
display(ratePer100kRenters)
display(countyRentersPercent)

Unnamed: 0,(n),Count,Percent
0,northeast,172,21.2%
1,metro_south,126,15.6%
2,central,118,14.6%
3,southeast,106,13.1%
4,western,101,12.5%
5,eastern,69,8.5%
6,malden district,17,2.1%
7,barnstable district,14,1.7%
8,pittsfield district,12,1.5%
9,cambridge district,9,1.1%


Unnamed: 0,(n),Plaintiffs,Defendants
0,Corporate Entity,479,1
1,Natural Person,331,809
2,Total,810,810


Unnamed: 0,(%),Plaintiffs,Defendants
0,Corporate Entity,59.1%,0.1%
1,Natural Person,40.9%,99.9%
2,Total,100.0%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,459,20,479
1,Optional,120,211,331
2,Total,579,231,810


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,56.7%,2.5%,59.1%
1,Optional,14.8%,26.0%,40.9%
2,Total,71.5%,28.5%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,0,1,1
1,Optional,14,795,809
2,Total,14,796,810


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,0.0%,0.1%,0.1%
1,Optional,1.7%,98.1%,99.9%
2,Total,1.7%,98.3%,100.0%


Unnamed: 0,Number of Adults in Households,Count,Percent
0,0,42.0,5.2%
1,1,536.0,66.2%
2,2,197.0,24.3%
3,3,24.0,3.0%
4,4,9.0,1.1%
5,5,1.0,0.1%
6,6,1.0,0.1%
7,Total,810.0,100.0%
8,Note: Households may appear with zero adults d...,,


Unnamed: 0,Initiating Action,Count,Percent
0,Non-payment,600,74.1%
1,Unknown/Other,118,14.6%
2,Cause,74,9.1%
3,No Fault,14,1.7%
4,Foreclosure,4,0.5%


Unnamed: 0,Municipality,Residents,Count
0,Buzzards Bay,87,3
1,Norwood,87,25
2,West Chatham,78,1
3,Ashfield,57,1
4,Shutesbury,56,1
...,...,...,...
151,Peabody,1,1
152,Somerville,1,1
153,(not given),0,55
154,span,0,1


Unnamed: 0,County,Renter Households,Count
0,Berkshire,154.0,25.0
1,Plymouth,150.0,65.0
2,Barnstable,134.0,26.0
3,Worcester,96.0,107.0
4,Essex,95.0,105.0
5,Bristol,94.0,78.0
6,Norfolk,89.0,76.0
7,Hampden,80.0,57.0
8,Hampshire,70.0,13.0
9,Franklin,66.0,7.0


Unnamed: 0,County,Households,% Renters
0,Barnstable,96509,20.0%
1,Berkshire,53792,30.0%
2,Bristol,220528,37.3%
3,Dukes,18146,22.6%
4,Essex,297898,36.9%
5,Franklin,30927,34.1%
6,Hampden,179970,39.5%
7,Hampshire,60002,30.7%
8,Middlesex,612366,38.1%
9,Nantucket,11399,30.8%


In [47]:
dataFrames = [courts,partyTypeNum,partyTypePercent,plaintiffRepNum,plaintiffRepPercent,defendantRepNum,
              defendantRepPercent,numAdultsHouseholds,initiatingAction,ratePer100k,ratePer100kRenters,countyRentersPercent]
sheetNames = ['Courts','Party Type','Party Type','Plaintiff Representation', 'Plaintiff Representation',
             'Defendant Representation','Defendant Representation','Number of Adults in Households',
             'Initiating Action','Municipality rate per 100k','County rate per 100k','County Households % Renters']
# path = '/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'

createCsv(dataFrames, sheetNames, 4)

week-ending-2020-11-21


# week-ending-2020-11-28

In [48]:
soup = getSoup(urls[5])
a,b,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,d,e,f = getSections(soup)

# NOTE: Had to do the parsing for d manually, as its formatted differently than the 'c' section
# remove beginning and ending newlines
reg = re.compile('^\r\n')
d = reg.sub('',d,count=1)
reg2 = re.compile('\r\n\r\n\r\n$')
d=reg2.sub('',d,count=1)
d1 = d.split("\r\n\r\n")
d1 = d1[1]
# remove newline at the beginning of the table "block"
reg = re.compile('^\r\n')
d1 = reg.sub('',d1,count=1)

# split the table into separate columns
d1 = d1.split('\r\n')
for index in range(len(d1)):
    d1[index] = re.split('  +',d1[index])

# load the data into a dataframe
countyRentersPercent = pd.DataFrame(d1[1:],columns=['County', 'Households', '% Renters'])

# Create dataframes for tables in 'c'
courts = convertToDf(c1,1)
partyTypeNum = convertToDf(c2,2)
partyTypePercent = convertToDf(c3,3)
plaintiffRepNum = convertToDf(c4,4)
plaintiffRepPercent = convertToDf(c5,5)
defendantRepNum = convertToDf(c6,6)
defendantRepPercent = convertToDf(c7,7)
numAdultsHouseholds = convertToDf(c8,8)
initiatingAction = convertToDf(c9,9)
ratePer100k = convertToDf(c10, 10)
ratePer100kRenters = convertToDf(c11,11)

# display data frames
display(courts)
display(partyTypeNum)
display(partyTypePercent)
display(plaintiffRepNum)
display(plaintiffRepPercent)
display(defendantRepNum)
display(defendantRepPercent)
display(numAdultsHouseholds)
display(initiatingAction)
display(ratePer100k)
display(ratePer100kRenters)
display(countyRentersPercent)

Unnamed: 0,(n),Count,Percent
0,western,131,20.8%
1,central,93,14.8%
2,northeast,85,13.5%
3,southeast,76,12.1%
4,metro_south,71,11.3%
5,eastern,70,11.1%
6,chelsea district,9,1.4%
7,lynn district,9,1.4%
8,cambridge district,8,1.3%
9,quincy district,8,1.3%


Unnamed: 0,(n),Plaintiffs,Defendants
0,Corporate Entity,425,16
1,Natural Person,204,613
2,Total,629,629


Unnamed: 0,(%),Plaintiffs,Defendants
0,Corporate Entity,67.6%,2.5%
1,Natural Person,32.4%,97.5%
2,Total,100.0%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,404,21,425
1,Optional,94,110,204
2,Total,498,131,629


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,64.2%,3.3%,67.6%
1,Optional,14.9%,17.5%,32.4%
2,Total,79.2%,20.8%,100.0%


Unnamed: 0,(n),Has Attorney,Pro Se,Total
0,Required,0,16,16
1,Optional,10,603,613
2,Total,10,619,629


Unnamed: 0,(%),Has Attorney,Pro Se,Total
0,Required,0.0%,2.5%,2.5%
1,Optional,1.6%,95.9%,97.5%
2,Total,1.6%,98.4%,100.0%


Unnamed: 0,Number of Adults in Households,Count,Percent
0,0,66.0,10.5%
1,1,457.0,72.7%
2,2,81.0,12.9%
3,3,19.0,3.0%
4,4,3.0,0.5%
5,5,3.0,0.5%
6,Total,629.0,100.0%
7,Note: Households may appear with zero adults d...,,


Unnamed: 0,Initiating Action,Count,Percent
0,Non-payment,469,74.6%
1,Unknown/Other,103,16.4%
2,Cause,39,6.2%
3,Foreclosure,10,1.6%
4,No Fault,8,1.3%


Unnamed: 0,Municipality,Residents,Count
0,East Falmouth,69,4
1,Dennis Port,63,2
2,Harwich Port,57,1
3,Mashpee,57,8
4,North Adams,43,6
...,...,...,...
93,South Weymouth,1,1
94,Springfield,1,2
95,(not given),0,319
96,Boston,0,2


Unnamed: 0,County,Renter Households,Count
0,Barnstable,119.0,23.0
1,Worcester,79.0,88.0
2,Bristol,75.0,62.0
3,Berkshire,43.0,7.0
4,Nantucket,28.0,1.0
5,Plymouth,25.0,11.0
6,Essex,24.0,27.0
7,Hampshire,21.0,4.0
8,Middlesex,20.0,47.0
9,Norfolk,19.0,17.0


Unnamed: 0,County,Households,% Renters
0,Barnstable,96509,20.0%
1,Berkshire,53792,30.0%
2,Bristol,220528,37.3%
3,Dukes,18146,22.6%
4,Essex,297898,36.9%
5,Franklin,30927,34.1%
6,Hampden,179970,39.5%
7,Hampshire,60002,30.7%
8,Middlesex,612366,38.1%
9,Nantucket,11399,30.8%


In [49]:
dataFrames = [courts,partyTypeNum,partyTypePercent,plaintiffRepNum,plaintiffRepPercent,defendantRepNum,
              defendantRepPercent,numAdultsHouseholds,initiatingAction,ratePer100k,ratePer100kRenters,countyRentersPercent]
sheetNames = ['Courts','Party Type','Party Type','Plaintiff Representation', 'Plaintiff Representation',
             'Defendant Representation','Defendant Representation','Number of Adults in Households',
             'Initiating Action','Municipality rate per 100k','County rate per 100k','County Households % Renters']
# path = '/Users/gonzo/Documents/AP/AP-Evictions-Tracker/'

createCsv(dataFrames, sheetNames, 5)

week-ending-2020-11-28
