In [78]:
# Set up imports
import pandas as pd
import numpy as np
import re
import sys

In [79]:
# Load dataset
init_df = pd.read_csv("Standardized Data/Final.csv")

In [80]:
# Look through and check data
init_df.drop(columns=["Unnamed: 0"], inplace=True)

In [81]:
# Drop rows with empty text
init_df.reset_index(drop = True)
init_df.columns

Index(['Entity Type', 'Entity Name', 'City, State', 'Enforcement Action',
       'Date', 'Text', 'State Enforcing', 'Text Length'],
      dtype='object')

In [82]:
# Add a new column to check if Text has been processed
init_df["Processed?"] = False

In [83]:
def find_length(text):
    if not pd.isna(text):
        return len(text)
    else:
        return 0

In [84]:
init_df

Unnamed: 0,Entity Type,Entity Name,"City, State",Enforcement Action,Date,Text,State Enforcing,Text Length,Processed?
0,RM,"UNITED MORTGAGE GROUP, INC.","PARMA, OH",REPORT AND RECOMMENDATION,01/27/2000,,OH,0,False
1,RM,"SUPERIOR HOME MORTGAGE, LLC","EUCLID, OH",NOTICE OF INTENT TO DENY,04/12/2000,,OH,0,False
2,RM,"UNITED MORTGAGE GROUP, INC.","PARMA, OH",DIVISION ORDER,04/12/2000,,OH,0,False
3,RM,"SUPERIOR HOME MORTGAGE, LLC","EUCLID, OH",REPORT AND RECOMMENDATION,11/02/2000,,OH,0,False
4,RM,"T. P. O. T. T., INC.","COLUMBUS, OH",NOTICE OF INTENT TO REVOKE,04/18/2001,STATE OF OHIO DEPARTMENT OF COMMERCE DIVISION ...,OH,8761,False
...,...,...,...,...,...,...,...,...,...
7236,MORTGAGE LOAN ORIGINATOR,JAMES P. DEORIO�,,CONSENT ORDER,11/26/2010,"COMMONWEALTH OF MASSACHUSETTS Suffolk, SS. ...",MA,6105,False
7237,MORTGAGE LOAN ORIGINATOR,MATTHEW J. VANCE,,CONSENT ORDER,11/30/2010,"COMMONWEALTH OF MASSACHUSETTS Suffolk, SS. ...",MA,6040,False
7238,MORTGAGE LOAN ORIGINATOR,RUSSELL TUNICK,,CONSENT ORDER,12/02/2010,"COMMONWEALTH OF MASSACHUSETTS Suffolk, SS. ...",MA,3990,False
7239,MORTGAGE COMPANY,"ADVANCED MORTGAGE SERVICES, LLC",,CONSENT ORDER,12/13/2010,"COMMONWEALTH OF MASSACHUSETTS Suffolk, SS. ...",MA,7271,False


In [85]:
init_df["Text Length"] = init_df["Text"].apply(find_length)

In [86]:
init_df[init_df["Processed?"]==True].shape

(0, 9)

## Beginning of Filtering Process

In [87]:
# Function to extract text between two strings (CAPS SENSITIVE)
def extract_text(row, sub1, sub2):
    text = row["Text"]
    if sub1 in text and sub2 in text:
        row["Found"] = True
        index_1 = text.index(sub1)
        if sub2 in text[index_1:]:
            index_2 = text[index_1:].index(sub2) + index_1
            if len(text[index_1 + len(sub1): index_2]) != 0:
                row["Text"] = text[index_1 + len(sub1): index_2]
                row["Processed?"] = True
    return row
    

In [88]:
test = "Ted Strickland STATE OF OHIO Kimberly A. Zurz Governor Director DEPARTMENT OF COMMERCE Division of Financial Institutions Consumer Finance In the matter of: ) Case No. M2008-257 ) MARIO R. EVANS ) ORDER OF SUMMARY SUSPENSION, 6541 Equestrian Trail ) NOTICE OF INTENT TO REVOKE Medina, Ohio 44256 ) & ) NOTICE OF HEARING DIVISION ORDER WHEREAS, the Ohio Department of Commerce, by and through the Superintendent of the Division of Financial Institutions (Division), is charged with the responsibility of administering and enforcing the Ohio Mortgage Broker Act, codified in Ohio Revised Code (R.C.) Chapter 1322. In accordance therewith, the Division is accountable for the licensing of individuals as residential mortgage loan officers; and WHEREAS, Mario R. Evans (Respondent) held loan officer license number 2648 in calendar year 2007. His address of record is 6541 Equestrian Trail, Medina, Ohio 44256, and his date of birth is January 29, 1972. Respondent's employer of record is National Mortgage Banc, Inc., 3009 Smith Road, Suite 600, Akron, Ohio 44333; and WHEREAS, R.C. 1322.10(F)(2) requires the Division to suspend, without a prior hearing, the license of a licensee who failed to fulfill the continuing education requirements of R.C. 1322.052 until such time as the required continuing education is completed and a fine of five hundred dollars ($500.00) is paid to the treasurer of state to the credit of the consumer finance fund; pursuant to Ohio Administrative Code section 1301:8-7-08(C)(4), an individual with a suspended license is not permitted to originate residential mortgage loans; and WHEREAS, Respondent failed to fulfill the continuing education requirements of R.C. 1322.052 for calendar year 2007; and Pursuant to R.C. 1322.10(F)(2), loan officer license number 002648 issued to Respondent is hereby SUSPENDED. It is so ordered. NOTICE OF INTENT TO REVOKE LOAN OFFICER LICENSE In accordance with sections 1322.041 and 1322.10 of the R.C., and R.C. Chapter 119, the Division intends to REVOKE Respondent's loan officer license. Mario R. Evans Suspension Order, Notice of Intent to Revoke & Notice of Hearing Page 2 of 2 BASIS FOR PROPOSED ACTION The Division has conducted an investigation of Respondent, pursuant to R.C. 1322.031(B), and has found the following: A. The Division is authorized by R.C. 1322.10(A)(1)(a) to revoke a loan officer license if the Division finds that the licensee has violated or fail[ed] to comply with any provision of sections 1322.01 to 1322.12 of the Revised Code or the rules adopted under those sections or any other law applicable to the business conducted[.]' B. Respondent previously held a loan officer license during the 2007 calendar year. C. R.C. 1322.052 requires every licensed loan officer to complete at least six (6) hours of approved continuing education (CE) every calendar year (by December 31st). D. Respondent failed to complete the required 6 hours of CE credit for the 2007 calendar year as required by R.C. 1322.052. E. Because Respondent failed to comply with R.C. 1322.052, the Division is authorized under R.C. 1322.10(A)(1)(a) to revoke Respondent's loan officer license. NOTICE OF HEARING Therefore, pursuant to R.C. 1322.10(F)(4), R.C. Chapter 119 and Section 1301:8-7-27 of the Ohio Administrative Code, Respondent is hereby notified that a hearing shall be held on Thursday, May 22, 2008 at 9:30 a.m. on the continuation or termination of the suspension imposed herein and on the Notice of Intent to Revoke your Loan Officer License. Said hearing will be held in Room 1910 located on the 19th Floor of the Vern Riffe Center for Government and the Arts, 77 South High Street in Columbus. At the hearing, Respondent may appear in person, by Respondent's attorney, or by such other representative as is permitted to practice before the Agency, or Respondent may present its position, arguments, or contentions in writing, and, at the hearing, may present evidence and examine witnesses appearing for and against Respondent. Signed and sealed this 28th day of April 2008. LEIGH A. WILLIS Deputy Superintendent for Consumer Finance Division of Financial Institutions Ohio Department of Commerce "

In [89]:
print(test.index("BASIS FOR PROPOSED ACTION"))
print(test[2148:].index("NOTICE OF HEARING")+2148)

2148
3159


In [90]:
init_df.loc[(init_df["State Enforcing"]=="MA")].shape

(323, 9)

In [91]:
# Check how many rows remain unprocessed
## USE THIS TO CHECK HOW MANY ROWS YOU HAVE LEFT
init_df.loc[(init_df["State Enforcing"]=="OH")].shape

(6918, 9)

In [92]:
# Function to Filter Text
def filter_text(df, start_term, end_term, state):
    df = df.apply(lambda row: extract_text(row, start_term, end_term) if (row["State Enforcing"]==state and row["Processed?"]==False and not pd.isna(row["Text"]))  else row, axis = 1)
    print(start_term)
    print(end_term)
    print("processed")
    print(len(df[df["Processed?"]==True]))
    print(len(df[df["Found"]==True]))
    return df


In [93]:
# Function to extract text between string to end(CAPS SENSITIVE)
def extract_text_end(row, sub):
    text = row["Text"]
    if sub in text:
        row["Found"] = True
        index_1 = text.rindex(sub)
        if len(text[index_1 + len(sub):]) != 0:
            row["Text"] = text[index_1 + len(sub):]
            row["Processed?"] = True
    return row

In [94]:
# Function to Filter Text to End
def filter_text_end(df, start_term, state):
    df = df.apply(lambda row: extract_text_end(row, start_term) if (row["State Enforcing"]==state and row["Processed?"]==False and not pd.isna(row["Text"]))  else row, axis = 1)
    print(start_term)
    print("processed")
    print(len(df[df["Processed?"]==True]))
    print(len(df[df["Found"]==True]))
    return df

In [95]:
df[(df["Processed?"]== False) & (df["State Enforcing"]=="OH")]["Enforcement Action"].value_counts()

REPORT AND RECOMMENDATION                        423
DIVISION ORDER                                    93
NOTICE OF INTENT TO DENY                          40
SETTLEMENT AGREEMENT                              26
NOTICE OF INTENT TO DENY RENEWAL                   7
NOTICE OF INTENT TO REVOKE                         6
CONSENT ORDER                                      5
NOTICE OF INTENT TO SUSPEND                        3
NOTICE OF INTENT TO FINE                           2
SUSPENSION ORDER                                   2
SUSPENSION ORDER & NOTICE OF INTENT TO REVOKE      2
NOTICE OF INTENT TO REVOKE AND FINE                1
NOTICE OF INTENT TO DENY (CE)                      1
Name: Enforcement Action, dtype: int64

In [98]:
df = init_df.copy()

In [99]:
print(len(df[(df["Text"].str.contains("STIPULATIONS AND ADMISSIONS")) & (df["Text"].str.contains("AGREED CONDITIONS"))]))

1023


In [100]:
# Cleaning of Text

# THIS FUNCTION FILTERS START AND END, JUST KIND OF COPY THE SYNTAX AND KEEP GOING DOWN.

# DIVISION ORDER 2394 hits
df = filter_text(df, "DIVISION ORDER", "NOTICE OF RIGHT TO APPEAL", "OH")
# NOTICE OF APPELLATE RIGHTS 1652 hits
df = filter_text(df, "DIVISION ORDER", "NOTICE OF APPELLATE RIGHTS", "OH")

# BASIS FOR PROPOSED ACTION: 2207 hits
# NOTICE OF OPPORTUNITY: 2006 hits


df = filter_text(df, "BASIS FOR PROPOSED ACTION", "NOTICE OF OPPORTUNITY FOR A HEARING ON PROPOSED ACTION", "OH")
# NOTICE OF HEARING: 201 hits

df = filter_text(df, "BASIS FOR PROPOSED ACTION", "NOTICE OF HEARING", "OH")


# STIPULATIONS AND ADMISSIONS 1031 HITS
# ACKNOWLEDGEMENTS/LIABILITY RELEASE 696 hits
df = filter_text(df, "STIPULATIONS AND ADMISSIONS", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")
# AGREED CONDITIONS 334 hits
df = filter_text(df, "STIPULATIONS AND ADMISSIONS", "AGREED CONDITIONS", "OH")
# ACKNOWLEDGEMENTS/LLABILITY RELEASE 1 hit
df = filter_text(df, "STIPULATIONS AND ADMISSIONS", "ACKNOWLEDGMENTS/LLABILITY RELEASE", "OH")
df = filter_text(df, "STIPULATIONS AND ADMISSIONS", "ACKNOWLEDGMENTS/LIABILITY] RELEASE", "OH")

# NOTICE OF RIGHT TO APPEAL 560 hits
# df = filter_text(df, "DIVISION ORDER", "NOTICE OF RIGHT TO APPEAL", "OH")
# NOTICE OF HEARING 119 hits
df = filter_text(df, "DIVISION ORDER", "NOTICE OF HEARING", "OH")
#CONCLUSIONS OF LAW 34 hits
df = filter_text(df, "DIVISION ORDER", "CONCLUSIONS OF LAW", "OH")
#Case No 34 hits
# df = filter_text(df, "DIVISION ORDER", "Case No", "OH")
# NOW THEREFORE 3 hits
df = filter_text(df, "DIVISION ORDER", "NOW THEREFORE", "OH")
# ORDERED AND DECREED 6 hits
df = filter_text(df, "DIVISION ORDER", "ORDERED AND DECREED", "OH")
# NOTICE OF APPEAL RIGHTS 4 hits
df = filter_text(df, "DIVISION ORDER", "NOTICE OF APPEAL RIGHTS", "OH")

df = filter_text_end(df, "DIVISION ORDER","OH")

#FINDINGS OF FACT 394 hits
#CONCLUSIONS OF LAW 393 hits
df = filter_text(df, ". FINDINGS OF FACT", ". CONCLUSIONS OF LAW", "OH")
df = filter_text(df, ". FINDINGS OF FACT", " CONCLUSIONS OF LAW", "OH")
# G.R.C. 1322.06 1 hit
df = filter_text(df, "The Division conducted an investigation and found", "CONCLUSIONS OF LAW", "OH")
df = filter_text(df, ".  FINDINGS OF FACT", " CONCLUSIONS OF LAW", "OH")
df = filter_text(df, " FINDINGS OF FACT", " CONCLUSIONS OF LAW", "OH")
df = filter_text(df, " FINDINGS OF FACT", " Conclusions of Law", "OH")

#NOTICE CONTAINED ALLEGATIONS AND FINDINGS THAT 79 hits
#compromise 53 hits
df = filter_text(df, "Notice contained allegations and findings that", "compromise", "OH")
#Settlement and Consent Order 22 hits
df = filter_text(df, "Notice contained allegations and findings that", "Settlement and Consent Order", "OH")
#request a hearing 4 hits
df = filter_text(df, "Notice contained allegations and findings that", "request a hearing", "OH")

df = filter_text(df, "ALLEGATIONS", "DETERMINATIONS", "OH")












# STIPULATIONS AND ADMISSIONS: 1031 hits
# ACKNOWLEDGEMENTS/LIABILITY RELEASE:
# df.loc[(df["Text"].str.contains(". FINDINGS OF FACT")) & (df["State Enforcing"]=="OH") & (df["Text"].str.contains("NOTICE OF OPPORTUNITY FOR A HEARING ON PROPOSED ACTION"))& (df["Processed?"]==False)].describe()
# df.loc[(df["Text"].str.contains("NOTICE OF OPPORTUNITY FOR A HEARING ON PROPOSED ACTION")) & (df["State Enforcing"]=="OH") & (df["Processed?"]==False)].describe()

DIVISION ORDER
NOTICE OF RIGHT TO APPEAL
processed
1280
1280
DIVISION ORDER
NOTICE OF APPELLATE RIGHTS
processed
2186
2186
BASIS FOR PROPOSED ACTION
NOTICE OF OPPORTUNITY FOR A HEARING ON PROPOSED ACTION
processed
4191
4191
BASIS FOR PROPOSED ACTION
NOTICE OF HEARING
processed
4392
4392
STIPULATIONS AND ADMISSIONS
ACKNOWLEDGMENTS/LIABILITY RELEASE
processed
5068
5068
STIPULATIONS AND ADMISSIONS
AGREED CONDITIONS
processed
5420
5420
STIPULATIONS AND ADMISSIONS
ACKNOWLEDGMENTS/LLABILITY RELEASE
processed
5421
5421
STIPULATIONS AND ADMISSIONS
ACKNOWLEDGMENTS/LIABILITY] RELEASE
processed
5423
5423
DIVISION ORDER
NOTICE OF HEARING
processed
5541
5541
DIVISION ORDER
CONCLUSIONS OF LAW
processed
5541
5541
DIVISION ORDER
NOW THEREFORE
processed
5541
5541
DIVISION ORDER
ORDERED AND DECREED
processed
5541
5541
DIVISION ORDER
NOTICE OF APPEAL RIGHTS
processed
5572
5572
DIVISION ORDER
processed
5581
5581
. FINDINGS OF FACT
. CONCLUSIONS OF LAW
processed
5913
5913
. FINDINGS OF FACT
 CONCLUSIONS OF

In [101]:
df = filter_text(df, "ALLEGATIONS", "PROPOSED ACTION", "OH")
df = filter_text(df, "ALLEGATIONS", "ORDER OF TEMPORARY SUSPENSION", "OH")
df = filter_text(df, "ALLEGATIONS", "AGREED FINAL ENTRY AND ORDER", "OH")
df = filter_text(df, "ALLEGATIONS", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")
df = filter_text(df, "BASIS FOR ACTION", "AGREED CONDITIONS", "OH")
df = filter_text(df, "stipulations, admissions and understandings", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")
df = filter_text(df, "allegations", "purposes of settlement", "OH")
df = filter_text(df, "alleged", "NOTICE OF RIGHT TO APPEAL", "OH")
df = filter_text(df, "STATEMENT OF FACTS", "PRAYER FOR RELIEF", "OH")
df = filter_text(df, "BASIS FOR ACTION", "ORDER OF TEMPORARY SUSPENSION", "OH")
df = filter_text(df, "allegations", "agree to the following", "OH")
df = filter_text(df, "AGREEMENTS AND UNDERSTANDINGS", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")
df = filter_text(df, "STATEMENT OF FACTS", "WHEREFORE, PLAINTIFFS REQUEST THIS COURT", "OH")
df = filter_text(df, "TERMINATION OF INTENT TO DENY MORTGAGE BROKER CERTIFICATE OF REGISTRATION APPLICATION", "IT IS SO ORDERED", "OH")
df = filter_text(df, "BACKGROUND", "AGREEMENT NOW COME", "OH")
df = filter_text(df, "AGREED CONDITIONS", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")
df = filter_text(df, "AGREED CONDITIONS", "Respondent understands that it must comply", "OH")
df = filter_text(df, " Findings of Fact", " Conclusions of Law", "OH")
df = filter_text(df,"Notice of Opportunity for a Hearing","IT IS SO ORDERED","OH")
df = filter_text(df,"Notice of Appellate Rights","ROBERT M. GRIESER","OH")
df = filter_text(df,"Howard E. Geiss Case No. 05-0231-LOD","Respectfully, Mar","OH")
df = filter_text(df,"BACKGROUND","CORRECTIVE ACTION","OH")
df = filter_text(df, "STATEMENT OF FACTS", "NOTICE OF PROPOSED ACTION", "OH")
df = filter_text(df, "stipulations, admissions and understandings", "AGREED CONDITIONS", "OH")
df = filter_text(df, "Enclosed", "Respectfully", "OH")
df = filter_text(df, "All of the following must be completed and RETURNED to the Division in the envelope provided no later than", "EFFECTIVE DATE", "OH")
df = filter_text(df, "All of the following must be completed and RETURNED to the Division in the envelope provided no later than", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")
df = filter_text(df,"WHEREAS, the Ohio Department of Commerce, by and through the Superintendent of the Division of Financial Institutions (the Division),","IT IS SO ORDERED","OH")
df = filter_text(df,"mutual promises set forth herein, the parties agree to the following:","ORDERED and DECREED","OH")
df = filter_text(df, ". FINDINGS", "NOTICE OF OPPORTUNITY FOR A HEARING ON PROPOSED ACTION", "OH")
df = filter_text(df, "Dear Ms. Cracium:", "If there is an error in our records,", "OH")
df = filter_text(df, "alleged", "Respondent makes no admission", "OH")
df = filter_text(df, ". CONCLUSIONS OF LAW", "RECOMMENDATION", "OH")
df = filter_text(df, "Dear Mr. McDonald", "If you wish to request a hearing,", "OH")
df = filter_text(df, "IT IS THEREFORE, agreed as follows", "If you wish to request a hearing,", "OH")
df = filter_text(df,"WHEREAS, the Ohio Department of Commerce, by and through the Superintendent of the Division of Financial Institutions (the Division)","IT IS SO ORDERED","OH")
df = filter_text(df, "allegations", "Respondent does not admit to the allegations", "OH")
df = filter_text(df, "NOTICE OF PROPOSED ACTION", "NOTICE OF OPPORTUNITY FOR A HEARING ON PROPOSED ACTION", "OH")

ALLEGATIONS
PROPOSED ACTION
processed
6131
6131
ALLEGATIONS
ORDER OF TEMPORARY SUSPENSION
processed
6151
6151
ALLEGATIONS
AGREED FINAL ENTRY AND ORDER
processed
6153
6153
ALLEGATIONS
ACKNOWLEDGMENTS/LIABILITY RELEASE
processed
6154
6154
BASIS FOR ACTION
AGREED CONDITIONS
processed
6161
6161
stipulations, admissions and understandings
ACKNOWLEDGMENTS/LIABILITY RELEASE
processed
6167
6167
allegations
purposes of settlement
processed
6184
6184
alleged
NOTICE OF RIGHT TO APPEAL
processed
6185
6185
STATEMENT OF FACTS
PRAYER FOR RELIEF
processed
6189
6189
BASIS FOR ACTION
ORDER OF TEMPORARY SUSPENSION
processed
6198
6198
allegations
agree to the following
processed
6205
6205
AGREEMENTS AND UNDERSTANDINGS
ACKNOWLEDGMENTS/LIABILITY RELEASE
processed
6211
6211
STATEMENT OF FACTS
WHEREFORE, PLAINTIFFS REQUEST THIS COURT
processed
6212
6212
TERMINATION OF INTENT TO DENY MORTGAGE BROKER CERTIFICATE OF REGISTRATION APPLICATION
IT IS SO ORDERED
processed
6213
6213
BACKGROUND
AGREEMENT NOW COME
proce

In [102]:
df = filter_text(df, "IT IS THEREFORE, agreed as follows", "ACKNOWLEDGMENTS/LIABILITY RELEASE", "OH")

IT IS THEREFORE, agreed as follows
ACKNOWLEDGMENTS/LIABILITY RELEASE
processed
6307
6307


In [103]:
df[(df["Processed?"]==False) & (df["State Enforcing"]=="MA")].loc[7234,"Text"]

" COMMONWEALTH OF MASSACHUSETTS  Suffolk, SS.  IN A MATTER  BEFORE THE COMMISSIONER OF BANKS  FINAL ORDER AND DECISION  DOCKET NUMBER: 2009-088  MASSACHUSETTS DIVISION OF BANKS  Boston, Massachusetts  Petitioner  vs.  AMERICAN LENDING GROUP, INC.  Saint Peters, Missouri  Respondent  Mortgage Company License No(s): MC5051 and MC5053  Appearance for Respondent:  Naren Chaganti, Esq.  713 The Hamptons Lane  Town & Country, MO 63017  Appearing for the Division of Banks:  Valerie Carbone, Esq.  Massachusetts Division of Banks  1000 Washington Street, 10\xa0 th \xa0Floor  Boston, MA 02118  Designated Administrative Hearing Officer of the Commissioner of Banks:  Aimee Desai, Esq.  BACKGROUND  THIS MATTER was scheduled for hearing before the Office of the Commissioner of Banks (Commissioner) on September 20, 2010, pursuant to Massachusetts General Laws chapter 30A, sections 10 and 11 and the Standard Adjudicatory Rules of Practice and Procedure 801 CMR 1.01\xa0 et seq ., upon a Notice of Heari

In [104]:
# Mass Processing
df = filter_text(df, "FINDINGS OF FACT  The Division of Banks (Division)", "CONCLUSIONS OF LAW  Based upon the information contained", "MA")
df = filter_text(df, "FINDINGS OF FACT AND REGULATORY BACKGROUND  The Division of Banks (Division)", "ORDER TO CEASE AND DESIST  After taking into consideration", "MA")
df = filter_text(df, ". Findings of Fact", "Conclusions of Law", "MA")
df = filter_text(df, ". FINDINGS OF FACT", "ORDER AND DECISION", "MA")
df = filter_text(df, ". FINDINGS OF FACT", "CEASE DIRECTIVE", "MA")
df = filter_text(df, ". FINDINGS OF FACT", "ORDER NOW COME", "MA")
df = filter_text(df, ". FINDINGS OF FACT", "ORDER TO CEASE AND DESIST", "MA")
df = filter_text(df, "BACKGROUND AND FINDINGS OF FACT", "ORDER TO CEASE AND DESIST", "MA")
df = filter_text(df, ". FINDINGS OF FACT", "Order to Cease and Desist are the complete", "MA")
df = filter_text(df, "ORDER TO CEASE AND DESIST  IT IS HEREBY ORDERED that", "IT IS FURTHER ORDERED, that", "MA")
df = filter_text(df, "STIPULATION AND CONSENT TO THE ISSUANCE OF AN ORDER TO CEASE AND DESIST (CONSENT AGREEMENT) with counsel", "IT IS FURTHER ORDERED, that the Bank, its institution-affiliated parties, and its successors and assigns, take affirmative action as follows", "MA")
df = filter_text(df, "STIPULATION AND CONSENT TO THE ISSUANCE OF A CONSENT ORDER (CONSENT AGREEMENT) with representatives of the Division of Banks (Division)", "ORDER  NOW COME the parties in the above-captioned matter", "MA")
df = filter_text(df, "STIPULATION AND CONSENT TO THE ISSUANCE OF A CONSENT ORDER", "ORDER NOW COME", "MA")
df = filter_text(df, "STIPULATION AND CONSENT TO THE ISSUANCE OF A CONSENT ORDER", "NOTICE OF RIGHT TO A HEARING", "MA")
df = filter_text(df, "STIPULATION AND CONSENT TO THE ISSUANCE OF A CONSENT ORDER", "MODIFICATION ORDER", "MA")
df = filter_text(df, "REGULATORY BACKGROUND", "contained in this Order and Notice", "MA")
df = filter_text(df, "joint Stipulation and proposed Order", "On or before the 30th day", "MA")
df = filter_text(df, "BACKGROUND", "AGREEMENT NOW COME", "MA")
df = filter_text(df, "THE MULTI-STATE EXAMINATION ALLEGATIONS", "Provide for adequate training", "MA")
df = filter_text(df, "NOW COME", "By order and direction of the Commissioner of Banks", "MA")
df = filter_text(df, "NOW COME", "BY ORDER AND DIRECTION OF THE COMMISSIONER OF BANKS", "MA")
df = filter_text(df, "STIPULATION AND CONSENT TO THE ISSUANCE OF AN ORDER TO CEASE", "IT IS HEREBY ORDERED", "MA")
df = filter_text(df, "ORDER SUSPENDING MORTGAGE BROKER LICENSE", "NOW THEREFORE", "MA")
df = filter_text(df, "Nuhu G. Mohammed is the President, director, and sole shareholder of All States Mortgage.", "sentencing him to probation for a term of thirty-six (36) months with criminal monetary penalties", "MA")
df = filter_text(df, "FINDINGS OF FACT  The Division of Banks (Division), through the Commissioner, has jurisdiction over the licensing and regulation of persons and entities engaged", "CONCLUSIONS OF LAW  Based on information contained in Paragraphs 1-12, Middlesex Financial", "MA")
df = filter_text(df, "WHEREAS on or about September 8, 2006 the Division issued", "This Consent Order shall become effective immediately upon the date of its issuance", "MA")
df = filter_text(df, "WHEREAS , on January 19, 2007, the Commissioner, acting pursuant to Massachusetts General Laws chapter 255E, section 7(b), issued a Findings of Fact and Temporary", "WHEREAS , as part of the Bankruptcy Proceeding, MLN has been engaged in the orderly winding down of its business operations and has ceased the activities of a licensed mortgage lender in the Commonwealth of Massachusetts", "MA")
df = filter_text(df, "effective date of this Order and Notice and was mailed to the Company's licensed address of record at 714b Southbridge Street, Auburn, MA 01501.", "VIOLATIONS  NOW, THEREFORE, the Division hereby sets forth the following Charges", "MA")
df = filter_text(df, "The Division of Banks (Division), through the Commissioner, has jurisdiction over the licensing and regulation of persons and entities engaged in the business of a mortgage broker in Massachusetts pursuant to Massachusetts General Laws chapter 255E, section", "CONCLUSIONS OF LAW  60. Based upon the information contained in Paragraphs 1 through 59", "MA")
df = filter_text(df, "ORDER TO SHOW CAUSE AND NOTICE OF RIGHT TO A HEARING, Docket No. 2009-007 (the Order and Notice", " ORDER  After taking into consideration the record, it is hereby:  ORDERED that ", "MA")
df = filter_text(df, "FINDINGS OF FACT 1. The Division of Banks (Division), through the Commissioner, has jurisdiction over the licensing and regulation of persons and entities engaged in the business of a mortgage lender and mortgage broker in Massachusetts pursuant to Massachusetts General Laws chapter 255E, section 2. ", "The public interest will be irreparably harmed by delay in issuing an ORDER TO CEASE AND DESIST to Civic Mortgage", "MA")
df = filter_text(df, "FINDINGS OF FACT 1. The Division of Banks (Division), through the Commissioner, has jurisdiction over the licensing and regulation of persons and entities engaged in the business of a mortgage lender and mortgage broker in Massachusetts pursuant to Massachusetts General Laws chapter 255E, section 2. In the Matter of Nations Lending Corporation Findings of Fact and Temporary Order to Cease and Desist Docket No. 2009-107", "CONCLUSIONS OF LAW 19. Based upon the information contained in Paragraphs 1 through 18, by failing to obtain and maintain a surety bond", "MA")
df = filter_text(df, "to evaluate the Respondent's compliance with the laws, regulations, and regulatory bulletins applicable to the conduct of a mortgage broker business in Massachusetts. ", "After taking into consideration the record in this matter, it is hereby:  ORDERED that Farkhanda Shah's Massachusetts mortgage broker license no. MB4194 is revoked", "MA")
df = filter_text(df, "ORDER SUSPENDING MORTGAGE BROKER LICENSE MB3202 (Order).  On or about May 18, 2009,", "NOW, THEREFORE , Lehi Mortgage's mortgage broker license MB3202 is hereby suspended, effective immediately", "MA")
df = filter_text(df, "THIS SETTLEMENT AGREEMENT is made this 29th day of June, 2010 between MORTGAGE MASTER, INC., Walpole, Massachusetts (Mortgage Master or Corporation), a licensed mortgage lender and mortgage broker under Massachusetts General Laws chapter 255E, section 2, and the Commonwealth of Massachusetts Division of Banks (Division).  BACKGROUND  WHEREAS, an examination/inspection of Mortgage Master was conducted pursuant to Massachusetts General Laws chapter 255E, section 8, as of February 9, 2009, to assess the Corporation's level of compliance with applicable Massachusetts and federal statutes and the Division's regulations governing the conduct of those engaged in the business of a mortgage lender and mortgage broker in the Commonwealth.", "AGREEMENT  NOW COME the parties in the above-captioned matter, the Division and Mortgage Master", "MA")
df = filter_text(df, "The Division of Banks (Division), through the Commissioner, has jurisdiction over the licensing and regulation of persons and entities engaged in the business of a mortgage lender and mortgage broker in Massachusetts pursuant to Massachusetts General Laws chapter 255E, section 2 and its implementing regulation 209 CMR 42.00 et seq.", "Therefore, Kenneth Terrill is ordered to immediately cease taking, offering or negotiating terms of a residential mortgage loan from consumers for residential property located in Massachusetts.", "MA")
df = filter_text(df, "FINDINGS OF FACT  Massachusetts General Laws chapter 255E, section 2 relative to the licensing requirements for entities engaged in the business of a mortgage lender and/or mortgage broker states in part: \t No person shall act as a mortgage broker or mortgage lender with respect to residential property unless first obtaining a license from the commissioner.", "CEASE DIRECTIVE  Reverse Mortgage Directory and any and all officers, members, managers, employees, independent contractors, or agents, operating on behalf of Reverse Mortgage Directory, and their successors or assigns, shall immediately cease engaging in the activities of a mortgage broker", "MA")
df = filter_text(df, "FINDINGS OF FACT  Unlicensed Activity  The Division of Banks (Division), through the Commissioner, has jurisdiction over the licensing and regulation of persons and entities engaged in the business of a mortgage lender and mortgage broker in Massachusetts pursuant to Massachusetts General Laws chapter 255E, section 2 and its implementing regulation at 209", "ORDER TO CEASE AND DESIST  SKG and any and all officers, members, managers, employees, independent contractors, or agents, operating on its behalf, and their successors or assigns, shall immediately cease engaging in the activities of a mortgage lender and/or mortgage broker, as those activities are defined under Massachusetts General Laws chapter 255E", "MA")
df = filter_text(df, "which commenced on September 25, 2008 (the 2008 examination/inspection)", "To the extent that Lenox Financial or any of the individual members or owners of Lenox Financial, as described above, who wish to resume business as a mortgage lender and/or mortgage broker under General Laws chapter 255E at the expiration of the 48-month term", "MA")
df = filter_text(df, " For a final Agency decision in favor of the Division and against American Lending for each Charge set forth in the Order to Show Cause", "CONCLUSION  The Division is an agency of the Commonwealth of Massachusetts and was created pursuant to Massachusetts General Laws chapter 26, section 1.", "MA")







FINDINGS OF FACT  The Division of Banks (Division)
CONCLUSIONS OF LAW  Based upon the information contained
processed
6375
6375
FINDINGS OF FACT AND REGULATORY BACKGROUND  The Division of Banks (Division)
ORDER TO CEASE AND DESIST  After taking into consideration
processed
6376
6376
. Findings of Fact
Conclusions of Law
processed
6376
6376
. FINDINGS OF FACT
ORDER AND DECISION
processed
6376
6376
. FINDINGS OF FACT
CEASE DIRECTIVE
processed
6376
6376
. FINDINGS OF FACT
ORDER NOW COME
processed
6376
6376
. FINDINGS OF FACT
ORDER TO CEASE AND DESIST
processed
6500
6500
BACKGROUND AND FINDINGS OF FACT
ORDER TO CEASE AND DESIST
processed
6500
6500
. FINDINGS OF FACT
Order to Cease and Desist are the complete
processed
6500
6500
ORDER TO CEASE AND DESIST  IT IS HEREBY ORDERED that
IT IS FURTHER ORDERED, that
processed
6501
6501
STIPULATION AND CONSENT TO THE ISSUANCE OF AN ORDER TO CEASE AND DESIST (CONSENT AGREEMENT) with counsel
IT IS FURTHER ORDERED, that the Bank, its institution-affili

which commenced on September 25, 2008 (the 2008 examination/inspection)
To the extent that Lenox Financial or any of the individual members or owners of Lenox Financial, as described above, who wish to resume business as a mortgage lender and/or mortgage broker under General Laws chapter 255E at the expiration of the 48-month term
processed
6629
6629
 For a final Agency decision in favor of the Division and against American Lending for each Charge set forth in the Order to Show Cause
CONCLUSION  The Division is an agency of the Commonwealth of Massachusetts and was created pursuant to Massachusetts General Laws chapter 26, section 1.
processed
6630
6630


In [107]:
df.loc[(df["State Enforcing"]=="MA")&(df["Processed?"]==True)].shape

(323, 10)

In [108]:
df["New Length"] = df["Text"].apply(find_length)
df["Percentage"] = df["New Length"] / df["Text Length"]

In [109]:
np.set_printoptions(threshold=sys.maxsize)
df.loc[(df["State Enforcing"]=="OH")].to_csv("Standardized Data/Ohio_Final_cleaned.csv")


In [110]:
df.loc[(df["State Enforcing"]=="MA")].to_csv("Standardized Data/MA_Final_cleaned.csv")

In [111]:
df.to_csv("Standardized Data/Final_cleaned.csv")

In [120]:
text = []
with open("stop_words.txt",'r', encoding = "ISO-8859-1") as file:
    word_list = file.readlines()
    for word in word_list:
        text.append(word[:-1])
      
final_string = ""
for word in text:
    final_string += f"|{word}"
    
final_string

"|either|manager|everything|hereupon|part|lieu|robert|give|\x91re|formal|hereinafter|these|serve|nobody|beforehand|revised|beside|attest|formerly|proceeding|whenever|responsibility|out|march|off|agree|quite|p|n\x92t|how|often|whereupon|section|proscribe|superintendent|\x92m|first|thereof|enough|appoint|specifically|\\t|allegation|none|all|n\x91t|who|opportunity|can|constitute|yours|because|evidence|apply|rule|anything|below|re|be|whereafter|hearma|'ll|against|anyhow|name|onto|damel|jennifer|\x92ll|within|anyone|division|terminate|many|really|once|\x91ll|follow|so|\x91ve|heshe|institute|every|appellate|mailing|administrative|act|december|my|under|oh|other|date|moreover|findings|always|four|'ve|must|others|himself|thereupon|ourselves|andor|unless|what|among|else|while|she|motion|m|\x91m|file|why|rather|eight|some|whose|at|loan|per|their|us|previously|sixty|your|becoming|adjucative|front|without|ca|top|although|done|purpose|arizona|and|myself|investigation|continue|ever|less|mine|'re|thir