In [None]:
from IPython.core.display import HTML
HTML("""
<style>
    h1{background-color:black; color:white; padding: 10px 10px 10px 10px}
    h2{background-color:blue; color:white;  padding: 5px 5px 5px 5px}
</style>
""")

# Load libraries

In [2]:
import pandas as pd
import numpy as np
import re
import string


import spacy
from spacy.en.word_sets import STOP_WORDS



from collections import Counter


from gensim.models import Phrases
from gensim.models.word2vec import LineSentence
from gensim.corpora import Dictionary, MmCorpus
from gensim.models.ldamodel import LdaModel

import pyLDAvis
import pyLDAvis.gensim


from pprint import pprint
import warnings

# Load data

In [3]:
mode = 'lspbs' 
#CJC
if mode == 'cjc':
    file_path = '../../data/original/cjc_cases_2017_sample_20180411_harmonized.csv'
    col_advice = 'LEGAL_ISSUES'
    col_synopsis = 'BACKGROUND_INFORMATION'
    col_casetype = 'CASE_TYPE_CJC'
#LSPBS
elif mode == 'lspbs':
    file_path = '../../data/original/lspbs_sample_2016_harmonized.csv'
    col_advice = 'ADVICE_SOUGHT'
    col_synopsis = 'CASE_SYNOPSIS'
    col_casetype = 'CASE_TYPE_LSPBS'
    

df = pd.read_csv(file_path).fillna('')


# Load spacy model

In [4]:
spacy.util.set_data_path("../../data/spacy")

In [5]:
nlp = spacy.load('en_core_web_sm/en_core_web_sm-1.2.0')

## Add to stopword list

#  Text cleaning

## Extract advice

In [6]:
advice = df[col_advice].values

## All characters

In [7]:
charset = set(list(''.join(advice)))
for c in sorted(list(charset)):
    print(c,end='')

 "#$%&'()*+,-./0123456789:;=?ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz ​–’•

## Regex patterns of common delimiters eg. bullet points / item numbers

In [8]:
## Split text by these regex patterns

regex_split_patterns = [
    r'(\s|^)-\s',      # ' - '
    r'\(?[12345]\)',   # '(1)', '1)'
    r':\\s*-?',        # ': -'
    r':',              # ':'
    r';\\s?'           # ';'
    r'-(?=[A-Z])'      # - preceding a capital letter'
]

regex_split = r'(' + r'|'.join(regex_split_patterns) + r')'

## Modifying stopword list

In [9]:
NEW_STOP_WORDS = STOP_WORDS.copy()

custom_stopwords = [
    'adverse','party','applicant',
    'legal','advice','general','guidance','advice','advise',
    'like','know','seeking',
]

to_keep = {
    'who','what','when','where','why','how','whether'
}

for s in custom_stopwords:
    NEW_STOP_WORDS.add(s)

for s in to_keep:
    NEW_STOP_WORDS.remove(s)

In [10]:
advice = [''.join([c if ord(c)<127 else ' ' for c in adv]) for adv in advice] # remove non-ASCII chars

advice = [adv.replace('A/P','Adverse Party') for adv in advice] # standardize some short hand notation
advice = [adv.replace('\'s','') for adv in advice] # remove 's if any
advice = [adv.replace('\"s','') for adv in advice] # remove "s if any

advice = [' '.join([tok for tok in adv.split() if not tok.lower() in NEW_STOP_WORDS]) for adv in advice] # Remove stopwords

advice = [re.split(regex_split,adv) for adv in advice] # split by common delimiters into segments
advice = [[segment for segment in adv if segment is not None] for adv in advice] # remove empty segments
advice = [[segment for segment in adv if re.search(r'[A-Za-z]',segment) ] for adv in advice] # remove segments with no words [FIXED BUG-15/4/18]

advice = [' '.join(adv) for adv in advice] # join back segments
advice = [adv.strip() for adv in advice] # remove trailing and leading whitespace
advice = [' '.join(adv.split()) for adv in advice] # remove any excess in-between whitespace


In [11]:
# sanity check
for i,(original,new) in enumerate(zip(df[col_advice],advice)):
    if i<10:
        print('CASE #',i+1)
        print()
        print('ORIGINAL:',original)
        print()
        print('CLEANED:',new)
        print()

CASE # 1

ORIGINAL:  - What actions can be taken to ensure the issue stays out of court?   - What can be done to protect himself as a consumer of the vehicle since the damage was caused by wear and tear rather than negligence? 

CLEANED: What actions taken ensure issue stays court? What protect consumer vehicle damage caused wear tear negligence?

CASE # 2

ORIGINAL:  - Is it advisable to engage a lawyer on this matter?  - What are the proceedings and its costs to obtain a letter of administration?   

CLEANED: advisable engage lawyer matter? What proceedings costs obtain letter administration?

CASE # 3

ORIGINAL:  ​Questions for the lawyer?   

CLEANED: Questions lawyer?

CASE # 4

ORIGINAL:  ​What is the best course of actions to undertake?  

CLEANED: What best course actions undertake?

CASE # 5

ORIGINAL: ​Applicant is unsure what kind of further legal action would be taken against him and would also like to know what he should do in the meantime.

CLEANED: unsure what kind actio

##  (Deprecated) Replacement of key words /tokens with tags

In [12]:
## Deprecated

RUNCODE = False

if RUNCODE:
    regex_applicant = re.compile(r'Applicant(\'s)?',re.IGNORECASE)
    regex_ap = re.compile(r'A/P(\'s)?',re.IGNORECASE) # A/P
    regex_ap_no = re.compile(r'A/P\s*#(\d+)',re.IGNORECASE) # "A/P#1", "A/P#2" etc
    regex_date_to_date = re.compile(r'\d{1,2}/\d{1,2}\s*-+\s*\d{1,2}/\d{1,2}') # "1/2 - 10/2"
    regex_money = re.compile(r'\$[\d\,K]+',re.IGNORECASE) # "$1000"
    regex_sgd = re.compile(r'\d+\s*(SGD\b|Singapore dollars)',re.IGNORECASE) # "500 SGD"
    regex_contraction = re.compile(r'(\S+)\'(d|ve|ll|t|s)\b',re.IGNORECASE)
    
    advice = [re.sub(regex_applicant,'<applicant>',s) for s in advice]
    advice = [re.sub(regex_ap_no,r'<adverse_party_no_\1>',s) for s in advice]
    advice = [re.sub(regex_ap,'<adverse_party>',s) for s in advice]
    advice = [re.sub(regex_date_to_date,'<date_to_date>',s) for s in advice]
    advice = [re.sub(regex_money,'<money>',s) for s in advice] 
    advice = [re.sub(regex_sgd,'<money>',s) for s in advice]
    advice = [re.sub(regex_contraction,r'\1\2',s) for s in advice]

In [13]:
len(advice) # sanity check to see if we have the same number of data pts

945

# Phrase Modelling

* (Optional) Join noun chunks with the help of Spacy
* Join statistically significant word collocations eg. 'divorce proceedings'


## Utility function for joining words into chunks

In [14]:
def join_noun_phrases(text):
    '''
    Adopted from https://explosion.ai/blog/sense2vec-with-spacy
    Takes in input text as string and outputs string with joined noun chunks
    '''
    
    # Parse text
    doc = nlp(text)
    
    # Merging noun chunks and entities 
    for np in doc.noun_chunks:
        # Only keep adjectives and nouns, e.g. "good ideas"
        try:
            while len(np) > 1 and np[0].dep_ not in ('amod', 'compound'):
                np = np[1:]
            if len(np) > 1:
                # Merge the tokens, e.g. good_ideas
                np.merge(np.root.tag_, np.text, np.root.ent_type_)
        except Exception as e:
            print(e)
            
        # Iterate over named entities
        try: 
            for ent in doc.ents:
                if len(ent) > 1:
                    # Merge them into single tokens
                    ent.merge(ent.root.tag_, ent.text, ent.label_)
        except Exception as e:
            print(e)
    
    # Join back into a string
    token_strings = []
    for token in doc:
        text = token.text.replace(' ', '_')
        tag = token.ent_type_ or token.pos_
        token_strings.append('%s' % (text))
    
    return ' '.join(token_strings)

## Function for preparing sentence corpus for phrase modelling

In [15]:
def sentence_corpus(corpus,
                    stopwords,
                    punctuation_to_remove, 
                    bool_join_noun_phrase=True): # Whether to concatenate noun chunks into single tokens
    
    new_corpus = []
    
    for txt in corpus:
        sentences = []
        sents = [sent.text for sent in nlp(txt).sents]
        
        for sent in sents:
            if bool_join_noun_phrase:
                new_sent = join_noun_phrases(sent) 
                
            #new_sent = ' '.join([t for t in new_sent.split() if t not in stopwords])  # remove stopwords
            new_sent = ''.join([w if w not in punctuation_to_remove else '' for w in new_sent ]) # remove unwanted punctuation
            new_sent = new_sent.strip() # remove leading / training white space
            new_sent = ' '.join([t.lower() for t in new_sent.split()]) # lowercase
            
            sentences.append(new_sent) # list of sentences for each case
            
        new_corpus.append(sentences)
    return new_corpus

## Generate sentence corpus (1 case = 1 list of tokens)

In [16]:
stopwords = NEW_STOP_WORDS

punctuation_to_remove = [p for p in string.punctuation if p not in '_'] # Punctuation to remove
print(punctuation_to_remove)
print()

sent_corpus_advice = sentence_corpus(advice,stopwords,punctuation_to_remove)

['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', '>', '?', '@', '[', '\\', ']', '^', '`', '{', '|', '}', '~']

Error calculating span: Can't find end
Error calculating span: Can't find end
Error calculating span: Can't find start
Error calculating span: Can't find start
Error calculating span: Can't find start
Error calculating span: Can't find start
Error calculating span: Can't find end
Error calculating span: Can't find end
Error calculating span: Can't find start
Error calculating span: Can't find end
Error calculating span: Can't find start
Error calculating span: Can't find start
Error calculating span: Can't find end
Error calculating span: Can't find start
Error calculating span: Can't find start
Error calculating span: Can't find start
Error calculating span: Can't find end
Error calculating span: Can't find start
Error calculating span: Can't find end
Error calculating span: Can't find start
Error calculating span: Can't find end

In [17]:
# sanity check
print(len(sent_corpus_advice))

# preview (each case will be represented with a list of "sentences", with noun chunks joined by _)
for i,(original,new) in enumerate(zip(advice,sent_corpus_advice)):
    if i< 10:
        print(i)
        print(original)
        print(new)

945
0
What actions taken ensure issue stays court? What protect consumer vehicle damage caused wear tear negligence?
['what actions taken ensure_issue stays court', 'what protect consumer_vehicle_damage caused wear tear_negligence']
1
advisable engage lawyer matter? What proceedings costs obtain letter administration?
['advisable_engage_lawyer_matter', 'what proceedings costs obtain letter_administration']
2
Questions lawyer?
['questions_lawyer']
3
What best course actions undertake?
['what best_course_actions undertake']
4
unsure what kind action taken what meantime.
['unsure what kind_action taken what meantime']
5
required forward payment $300 Applicant, still? required assist payment $100 XYZ home, law? child chose live Party, Applicant, alternatively required pay maintenance responsibility lies Party? increase maintenance done? What actions taken pursue accordingly?
['required forward payment 300 applicant still required assist payment 100 xyz home law', 'child chose live party ap

## Phrase modelling iterations

In [18]:
N = 10 # Max N-gram length

with warnings.catch_warnings():
    # DeprecationWarning: inspect.getargspec() is deprecated, use inspect.signature() instead
    warnings.simplefilter('ignore')
    
    # Recursively concatenate (using _) significant collocated word pairs for N iterations
    for i in range(N-1):
        sent_tokens_advice = [sent.split() for sents in sent_corpus_advice for sent in sents]
        ngram_model_advice = Phrases(sent_tokens_advice,threshold=1)
        sent_corpus_advice = [
            [' '.join(ngram_model_advice[sent.split()]) for sent in sents] 
            for sents in sent_corpus_advice
        ]


In [19]:
sent_corpus_advice   = [' '.join(sents).split() for sents in sent_corpus_advice]

In [20]:
# Preview - a list of lists of tokens
for adv,sent in zip(advice[:10],sent_corpus_advice[:10]):
    print(adv)
    print()
    print(sent)
    print()

What actions taken ensure issue stays court? What protect consumer vehicle damage caused wear tear negligence?

['what_actions_taken', 'ensure_issue', 'stays', 'court', 'what', 'protect', 'consumer_vehicle_damage', 'caused', 'wear', 'tear_negligence']

advisable engage lawyer matter? What proceedings costs obtain letter administration?

['advisable_engage_lawyer_matter', 'what', 'proceedings', 'costs', 'obtain', 'letter_administration']

Questions lawyer?

['questions_lawyer']

What best course actions undertake?

['what', 'best_course_actions', 'undertake']

unsure what kind action taken what meantime.

['unsure', 'what', 'kind_action', 'taken', 'what', 'meantime']

required forward payment $300 Applicant, still? required assist payment $100 XYZ home, law? child chose live Party, Applicant, alternatively required pay maintenance responsibility lies Party? increase maintenance done? What actions taken pursue accordingly?

['required', 'forward', 'payment', '300', 'applicant', 'still', 

# Topic Modelling with Latent Dirichlet Allocation

## All cases

In [21]:
dictionary_advice = Dictionary(sent_corpus_advice)

dictionary_advice.filter_extremes(no_below=10, no_above=1)
dictionary_advice.compactify()

bow_advice = [dictionary_advice.doc2bow(text) for text in sent_corpus_advice]

In [22]:
## Sanity check if same number as original cases
len(bow_advice)

945

In [23]:
LDA_SEED = 123
LDA_ADVICE_NUM_TOPICS = 5 # Number of topics
LDA_ADVICE_ALPHA = 0.0001 # Control document sparsity --- can be tuned

with warnings.catch_warnings():
    warnings.simplefilter('ignore')

    lda_advice = LdaModel(
        random_state=LDA_SEED,
        alpha=LDA_ADVICE_ALPHA,
        num_topics=LDA_ADVICE_NUM_TOPICS,
        corpus=bow_advice,
        id2word=dictionary_advice,
        update_every=1,
        passes=1
    )
    
    LDAvis_advice = pyLDAvis.gensim.prepare(
        lda_advice, 
        bow_advice,
        dictionary_advice
    )

In [24]:
pyLDAvis.display(LDAvis_advice)

## Deep Dive into individual case types

In [25]:
casetypes = df[col_casetype].values

In [26]:
top5types = [i[0] for i in Counter(casetypes).most_common(5)]

In [27]:
top5types

['Divorce / Judicial Separation/ Annulment/ PPO/ DEO/ Maintenance/ Child Custody / Division of Property (Family)',
 'Employment Agreements & Workplace Issues (Contract)',
 'Other Legal Matters (Other)',
 'Estate / Wills / Probate (Property)',
 'General Service & Hire Agreements (Issue with Contractor, etc.) Contract)']

In [28]:
dfcasetypefreq = pd.DataFrame(Counter(df[col_casetype]).most_common())
dfcasetypefreq.columns = ['casetype','casecount']
dfcasetypefreq['percentofcases'] = dfcasetypefreq['casecount'] / df.shape[0] * 100
dfcasetypefreq['cumulativepercent'] = dfcasetypefreq['percentofcases'].cumsum()
dfcasetypefreq.head(10)

Unnamed: 0,casetype,casecount,percentofcases,cumulativepercent
0,Divorce / Judicial Separation/ Annulment/ PPO/...,239,25.291005,25.291005
1,Employment Agreements & Workplace Issues (Cont...,191,20.21164,45.502646
2,Other Legal Matters (Other),77,8.148148,53.650794
3,Estate / Wills / Probate (Property),55,5.820106,59.470899
4,General Service & Hire Agreements (Issue with ...,49,5.185185,64.656085
5,General Product & Purchase Agreements (Faulty ...,48,5.079365,69.73545
6,Intentional Harassment (POHA) / Assault (Tort),40,4.232804,73.968254
7,Bankruptcy / Insolvency (Other),35,3.703704,77.671958
8,Personal Loan (Lending Money Between Individua...,33,3.492063,81.164021
9,Landlord / Tenant (Property),29,3.068783,84.232804


## Function to generate topics by case type

In [29]:
def topic_output_by_case_type(SENT_CORPUS,CASE_TYPES,selected_case_type,ldaseed=123,topics=5,alpha=0.0001,debug=False):
    
    try:
        
        if debug:
            print('Case type',selected_case_type)
        
        # which are the selected cases
        tmpbool = [CASE_TYPES==selected_case_type] 
        
        # filter the corpus
        tmpcorpus = list(np.array(SENT_CORPUS)[tmpbool])
        
        if debug:
            print('Number of cases:',len(tmpcorpus))
            print()
            print('Cases',tmpcorpus)
            print()
        
        # remove empty strings
        tmpcorpus = [case for case in tmpcorpus if len(case)>0]
        
        if debug:
            print('Number of cases after removing empty:',len(tmpcorpus))
            print()
        
        
        # Construct dictionary
        tmpdict = Dictionary(tmpcorpus)
        tmpdict.filter_extremes(no_below=2, no_above=1)
        tmpdict.compactify()
        
        if debug:
            print('Dictionary',tmpdict)
        
        # Construct bag of words
        tmpbow   = [tmpdict.doc2bow(text) for text in tmpcorpus]
        
        
        # LAD configuration
        LDA_SEED = ldaseed # random seed
        LDA_ADVICE_NUM_TOPICS = topics # number of topics
        LDA_ADVICE_ALPHA = alpha # alpha param : set it lower to increase topic sparsity for each document

        
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')

            tmplda = LdaModel(
                random_state=LDA_SEED,
                alpha=LDA_ADVICE_ALPHA,
                num_topics=LDA_ADVICE_NUM_TOPICS,
                corpus=tmpbow,
                id2word=tmpdict,
                update_every=1,
                passes=1
            )

            tmpldavis = pyLDAvis.gensim.prepare(
                tmplda, 
                tmpbow,
                tmpdict
            )

        return tmplda,tmpldavis
    
    except Exception as e: 
        print(e)
        return (None,None)

In [30]:
ALLCASETYPES = [i[0] for i in Counter(df[col_casetype]).most_common()]

## Case Type Rank #1

In [31]:
i = 0
tmpcasetype = ALLCASETYPES[i]
print()
print(tmpcasetype)
print()
print(df[df[col_casetype]==tmpcasetype].shape[0],'cases')
print()

tmpldaoutput = topic_output_by_case_type(
    SENT_CORPUS = sent_corpus_advice,
    CASE_TYPES = casetypes,
    selected_case_type = tmpcasetype,
    topics = 5,
    alpha = 0.0001
) 

pyLDAvis.display(tmpldaoutput[1])


Divorce / Judicial Separation/ Annulment/ PPO/ DEO/ Maintenance/ Child Custody / Division of Property (Family)

239 cases



### Term / Phrase Frequency

In [32]:
tmptokens = list(np.array(sent_corpus_advice)[df[col_casetype]==tmpcasetype])
tmptokens = [list(set(toks)) for toks in tmptokens]
tmptokens = [tok for toks in tmptokens for tok in toks if tok not in NEW_STOP_WORDS]
tmptokens = pd.DataFrame(Counter(tmptokens).most_common())
tmptokens.columns = ['token','doc_count']
tmptokens.head(20)

Unnamed: 0,token,doc_count
0,how,58
1,divorce,46
2,what,37
3,maintenance,34
4,whether,31
5,wants,26
6,divorce_proceedings,22
7,wishes,14
8,flat,12
9,house,11


## Case Type Rank #2

In [33]:
i = 1
tmpcasetype = ALLCASETYPES[i]

print()
print(tmpcasetype)
print()
print(df[df[col_casetype]==tmpcasetype].shape[0],'cases')
print()

tmpldaoutput = topic_output_by_case_type(
    SENT_CORPUS = sent_corpus_advice,
    CASE_TYPES = casetypes,
    selected_case_type = tmpcasetype,
    topics = 3,
    alpha = 0.0001
) 

pyLDAvis.display(tmpldaoutput[1])


Employment Agreements & Workplace Issues (Contract)

191 cases



In [34]:
tmptokens = list(np.array(sent_corpus_advice)[df[col_casetype]==tmpcasetype])
tmptokens = [list(set(toks)) for toks in tmptokens]
tmptokens = [tok for toks in tmptokens for tok in toks if tok not in NEW_STOP_WORDS]
tmptokens = pd.DataFrame(Counter(tmptokens).most_common())
tmptokens.columns = ['token','doc_count']
tmptokens.head(20)

Unnamed: 0,token,doc_count
0,whether,34
1,how,34
2,what,31
3,pay,23
4,claim,18
5,salary,16
6,contract,13
7,wants,13
8,terminate,11
9,case,10


## Case Type Rank #3

In [35]:
i = 2
tmpcasetype = ALLCASETYPES[i]

print()
print(tmpcasetype)
print()
print(df[df[col_casetype]==tmpcasetype].shape[0],'cases')
print()

tmpldaoutput = topic_output_by_case_type(
    SENT_CORPUS = sent_corpus_advice,
    CASE_TYPES = casetypes,
    selected_case_type = tmpcasetype,
    topics = 5,
    alpha = 0.0001
) 

pyLDAvis.display(tmpldaoutput[1])


Other Legal Matters (Other)

77 cases



In [36]:
tmptokens = list(np.array(sent_corpus_advice)[df[col_casetype]==tmpcasetype])
tmptokens = [list(set(toks)) for toks in tmptokens]
tmptokens = [tok for toks in tmptokens for tok in toks if tok not in NEW_STOP_WORDS]
tmptokens = pd.DataFrame(Counter(tmptokens).most_common())
tmptokens.columns = ['token','doc_count']
tmptokens.head(20)

Unnamed: 0,token,doc_count
0,how,16
1,what,16
2,whether,10
3,pay,6
4,party1,5
5,compensation,4
6,company,4
7,able,4
8,wishes,4
9,case,4


## Case Type Rank #4

In [37]:
i = 3
tmpcasetype = ALLCASETYPES[i]

print()
print(tmpcasetype)
print()
print(df[df[col_casetype]==tmpcasetype].shape[0],'cases')
print()

tmpldaoutput = topic_output_by_case_type(
    SENT_CORPUS = sent_corpus_advice,
    CASE_TYPES = casetypes,
    selected_case_type = tmpcasetype,
    topics = 5,
    alpha = 0.0001
) 

pyLDAvis.display(tmpldaoutput[1])


Estate / Wills / Probate (Property)

55 cases



In [38]:
tmptokens = list(np.array(sent_corpus_advice)[df[col_casetype]==tmpcasetype])
tmptokens = [list(set(toks)) for toks in tmptokens]
tmptokens = [tok for toks in tmptokens for tok in toks if tok not in NEW_STOP_WORDS]
tmptokens = pd.DataFrame(Counter(tmptokens).most_common())
tmptokens.columns = ['token','doc_count']
tmptokens.head(20)

Unnamed: 0,token,doc_count
0,how,16
1,what,14
2,wants,7
3,wishes,7
4,whether,7
5,seek,5
6,transfer,5
7,flat,5
8,possible,4
9,procedures,4


## Case Type Rank #5

In [39]:
i = 4
tmpcasetype = ALLCASETYPES[i]

print()
print(tmpcasetype)
print()
print(df[df[col_casetype]==tmpcasetype].shape[0],'cases')
print()

tmpldaoutput = topic_output_by_case_type(
    SENT_CORPUS = sent_corpus_advice,
    CASE_TYPES = casetypes,
    selected_case_type = tmpcasetype,
    topics = 5,
    alpha = 0.0001
) 

pyLDAvis.display(tmpldaoutput[1])


General Service & Hire Agreements (Issue with Contractor, etc.) Contract)

49 cases



In [40]:
tmptokens = list(np.array(sent_corpus_advice)[df[col_casetype]==tmpcasetype])
tmptokens = [list(set(toks)) for toks in tmptokens]
tmptokens = [tok for toks in tmptokens for tok in toks if tok not in NEW_STOP_WORDS]
tmptokens = pd.DataFrame(Counter(tmptokens).most_common())
tmptokens.columns = ['token','doc_count']
tmptokens.head(20)

Unnamed: 0,token,doc_count
0,whether,8
1,how,7
2,how_claim,6
3,what,6
4,seek,5
5,claim,5
6,right,5
7,wishes,5
8,contract,4
9,nb_relevant_documents_forwarded,4


# Appendix : all cases

In [41]:
RUNCODE = False

original_casetypes = df[col_casetype].values
original_synopsis = df[col_synopsis].values
original_advice = df[col_advice].values

if RUNCODE:
    for i,(c,s,a,cleaned) in enumerate(zip(original_casetypes,original_synopsis,original_advice,advice)):
        print()
        print(i+1)
        print()
        print('Case Type')
        print('='*8)
        print()
        print(c)
        print()
        print('Synopses')
        print('='*8)
        print()
        print(s)
        print()
        print('Advice')
        print('='*6)
        print()
        print(a)
        print()
        print('Cleaned')
        print()
        print(cleaned)
        print('='*150)