# JP Moragan Data Extractor

## Install Libraries and Imports

In [1]:
!pip install PyPDF2==3.0.1



In [2]:
!pip install gdown



In [3]:
import pandas as pd
import re
import PyPDF2
import gdown

## Helper Functions

Helper function to extract text from the pdf

In [4]:
def extract_text_from_pdf(pdf_path):
    """Extracts text from a PDF file."""
    text = ""
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            for page_num in range(len(reader.pages)):
                page = reader.pages[page_num]
                text += page.extract_text()
    except Exception as e:
        print(f"Error reading PDF: {e}")
    return text

In [5]:
def extract_blocks_from_section(processed_text):
    # Define the separator pattern (newline, dots, newline)
    separator_regex = r'\.{10,}'

    # Split the text into blocks using the separator
    blocks = re.split(separator_regex, processed_text)

    # Filter out any empty blocks that might result from splitting
    blocks = [block.strip() for block in blocks if block.strip()]
    cleaned_blocks = []

    for block in blocks:
      lines = block.split('\n')
      lines = [line.strip() for line in lines if line.strip()] # Clean empty lines within the block
      lines = [line for line in lines if not line.isdigit()] # Clean lines with page number

      if not lines:
        continue # Skip empty blocks

      cleaned_blocks.append('\n'.join(lines))

    return cleaned_blocks

Helper Function parses management discussion section with the fllowing columns:



1. `speaker`,
2. `role`,
3. `content`,
4. `quarter`, and
5. `year`





In [6]:
def parse_management_discussion_section(full_text):
    """
    Parses the Management Discussion section from a transcript into a pandas dataframe
    with 'speaker', 'role', 'content' columns.

    Args:
        full_text (str): The complete transcript text.

    Returns:
        pd.DataFrame: A DataFrame with 'speaker', 'role', 'content' columns.
    """
    entries = []

    # Add an implicit separator at the end to ensure the last block is captured
    processed_text = full_text.strip()
    processed_text += "\n........................................................................................"

    blocks = extract_blocks_from_section(processed_text)

    for block in blocks:
        lines = block.split('\n')
        lines = [line.strip() for line in lines if line.strip()] # Clean empty lines within the block
        lines = [line for line in lines if not line.isdigit()] # Clean lines with page number

        speaker_name = "N/A"
        role_name = "N/A"
        text_content = ""

        if not lines:
            continue # Skip empty blocks

        # Handle the Operator case: Speaker and start of text are on the first line
        if lines[0].startswith("Operator"):
            continue

        else: # Standard speaker: Name on line 1, Role on line 2, Text after
            if len(lines) >= 1:
                speaker_name = lines[0]
            if len(lines) >= 2:
                role_name = lines[1]
                role_name, company_name = (
                    role_name.split(",")[0].strip(),
                    role_name.split(",")[-1].strip(),
                )
                role_name = ", ".join(sorted([
                    role.strip() for role in re.split(r"&|and", role_name)
                ]))
                company_name = (
                    "JPMorgan Chase & Co."
                    if "morgan" in company_name.lower()
                    and "jp".lower() in company_name.lower()
                    else company_name
                )
            if len(lines) > 2:
                text_content = "\n".join(lines[2:])

        # Final cleanup for text content (e.g., removing any leading/trailing blank lines)
        text_content = text_content.strip()

        entries.append({
            "speaker": speaker_name,
            "role": role_name,
            "company": company_name,
            "content": text_content
        })

    df = pd.DataFrame(entries)
    return df

Helper function to parse the Q&A section into a pandas dataframe with the columns:

- question_answer_group_id: The questions and answers are grouped together for a specific analyst, so the group id is the identifier of the question and answer group.
- speaker: The name of the speaker
- role: The role of the speaker, e.g. Analyst, Cheif Executive
- content: The content spoken

In [7]:
def parse_q_and_a_section(full_text):
    """
    Parses a Q&A transcript into a list of speaker, role, and text dictionaries,
    assuming speaker name is line 1, role is line 2, and text is subsequent lines.
    Handles the special 'Operator' case.

    Args:
        full_text (str): The complete transcript text.

    Returns:
        pd.DataFrame: A DataFrame with 'question_answer_group_id', 'speaker', 'role', and 'content' columns.
    """
    entries = []

    # Add an implicit separator at the end to ensure the last block is captured
    processed_text = re.sub(r"^\s*QUESTION AND ANSWER SECTION\s*\n+", "", full_text, flags=re.MULTILINE).strip()
    processed_text += "\n........................................................................................"

    blocks = extract_blocks_from_section(processed_text)

    # Initialise the question group index
    question_group_index = 0

    for block in blocks:
        lines = block.split('\n')
        lines = [line.strip() for line in lines if line.strip()] # Clean empty lines within the block
        lines = [line for line in lines if not line.isdigit()] # Clean lines with page number

        speaker_name = "N/A"
        role_name = "N/A"
        text_content = ""
        start_index = 0

        if not lines:
            continue # Skip empty blocks

        if len(lines) == 1:
          continue

        if lines[0].startswith('.'):
          start_index = 1

        # Handle the Operator case: Speaker and start of text are on the first line
        if lines[start_index].startswith("Operator"):
            question_group_index = question_group_index + 1
            continue

        # Handle the disclaimer at the end
        if lines[start_index].startswith("Disclaimer"):
            continue

        else: # Standard speaker: Name on line 1, Role on line 2, Text after
            if len(lines) >= 1:
                speaker_name = lines[start_index]
            if len(lines) >= 2:
                role_name = lines[start_index + 1]
                # Remove optional Q/A from role
                role_name = re.sub(r'\s*(Q|A)$', '', role_name).strip()
                role_name, company_name = (
                    role_name.split(",")[0].strip(),
                    role_name.split(",")[-1].strip(),
                )
                role_name = ", ".join(sorted([
                    role.strip() for role in re.split(r"&|and", role_name)
                ]))
                company_name = (
                    "JPMorgan Chase & Co."
                    if "morgan" in company_name.lower()
                    and "jp".lower() in company_name.lower()
                    else company_name
                )
            if len(lines) > 2:
                text_content = "\n".join(lines[start_index + 2:])

        # Final cleanup for text content (e.g., removing any leading/trailing blank lines)
        text_content = text_content.strip()

        entries.append({
            "question_answer_group_id": question_group_index,
            "speaker": speaker_name,
            "role": role_name,
            "company": company_name,
            "content": text_content
        })

    df = pd.DataFrame(entries)
    return df

In [8]:
def parse_transcript_to_dataframes(raw_text):
    """Parses a raw transcript text into a DataFrame of speaking turns."""

    # Initial Cleaning
    # Remove source tags
    text = re.sub(r'\\', '', raw_text)
    # Condense multiple newlines
    text = re.sub(r'\n{2,}', '\n', text).strip()

    # Section Segmentation
    sections = re.split(r'(MANAGEMENT DISCUSSION SECTION|QUESTION AND ANSWER SECTION)', text)
    sections = [section.strip() for section in sections if section.strip()]
    parsed_turns = []
    current_section_name = "INTRO" # Default for anything before first section header

    df_q_and_a = pd.DataFrame()
    df_presentation = pd.DataFrame()

    # Iterate through the sections and their content
    for i, part in enumerate(sections):
        # skip if the part is empty
        if not part.strip():
            continue

        # Store the section name then move onto the content of the section
        if part in ["MANAGEMENT DISCUSSION SECTION", "QUESTION AND ANSWER SECTION"]:
            current_section_name = part
            continue

        # If we're looking at the Q&A section, then parse to the Q&A dataframe
        if current_section_name == 'QUESTION AND ANSWER SECTION':
            df_q_and_a = parse_q_and_a_section(part)

        # If we're looking at the Q&A section, then parse to the Q&A dataframe
        if current_section_name == 'MANAGEMENT DISCUSSION SECTION':
            df_presentation = parse_management_discussion_section(part)

    return df_presentation, df_q_and_a

## Main Extraction Code

In [9]:
# Download the files
url = 'https://drive.google.com/drive/folders/1MggwgapE4FOnVGRpMiM3jDwPcqU9CmOb?usp=drive_link'
paths = gdown.download_folder(url, quiet=True)
pdf_paths = [item for item in paths if item.endswith('.pdf')]

In [10]:
# Initialise empty arrays for our dataframes of each year
management_discusstions = []
q_and_as = []

# Loop through the pdfs and extract the data
for path in pdf_paths:
  print(f"Processing {path}...")

  # Extract the year and quarter from the file path
  year = path.lower().split('-')[0].split('/')[-1].split('q')[-1]
  quarter = path.lower().split('-')[0].split('/')[-1].split('q')[0]

  # Convert to integers
  year = int(year)
  quarter = int(quarter)

  # Parse the pdf into separate dataframes
  pdf_text = extract_text_from_pdf(path)
  management_discussion, q_and_a = parse_transcript_to_dataframes(pdf_text)

  # Append the year and quarter
  management_discussion['year'] = year
  management_discussion['quarter'] = quarter
  q_and_a['year'] = year
  q_and_a['quarter'] = quarter

  # Append the dfs to our list
  management_discusstions.append(management_discussion)
  q_and_as.append(q_and_a)

# Combine all the reports into one dataframe per section
management_discussion_df = pd.concat(management_discusstions, ignore_index=True)
q_and_a_df = pd.concat(q_and_as, ignore_index=True)

Processing /content/Transcripts/1q23-earnings-transcript.pdf...
Processing /content/Transcripts/2q23-earnings-transcript.pdf...
Processing /content/Transcripts/4q24-earnings-transcript.pdf...
Processing /content/Transcripts/1q25-earnings-transcript.pdf...
Processing /content/Transcripts/1q24-earnings-transcript.pdf...
Processing /content/Transcripts/3Q22-earnings-transcript.pdf...
Processing /content/Transcripts/2q24-earnings-transcript.pdf...
Processing /content/Transcripts/3q23-earnings-transcript.pdf...
Processing /content/Transcripts/4q22-earnings-transcript.pdf...
Processing /content/Transcripts/4q23-earnings-transcript.pdf...
Processing /content/Transcripts/2q22-earnings-transcript.pdf...
Processing /content/Transcripts/3q24-earnings-transcript.pdf...
Processing /content/Transcripts/1q22-earnings-transcript.pdf...


In [11]:
print("Question and Answer DF\n")

display(q_and_a_df.head())

print("\n\n")

print("Management Presentation DF\n")

display(management_discussion_df.head())

Question and Answer DF



Unnamed: 0,question_answer_group_id,speaker,role,company,content,year,quarter
0,0,Steven Chubak,Analyst,Wolfe Research LLC,"Hey, good morning.",23,1
1,0,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Good morning, Steve.",23,1
2,0,Steven Chubak,Analyst,Wolfe Research LLC,"So, Jamie, I was actually hoping to get your p...",23,1
3,0,Jamie Dimon,"Chairman, Chief Executive Officer",JPMorgan Chase & Co.,"Well, I think you were already kind of complet...",23,1
4,0,Steven Chubak,Analyst,Wolfe Research LLC,Got it. And just in terms of appetite for the ...,23,1





Management Presentation DF



Unnamed: 0,speaker,role,company,content,year,quarter
0,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Thanks, and good morning, everyone. The presen...",23,1
1,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Thanks, operator. Good morning, everyone. The ...",23,2
2,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Thank you, and good morning, everyone. Startin...",24,4
3,Jamie Dimon,"Chairman, Chief Executive Officer",JPMorgan Chase & Co.,"Good morning, everybody. I just want to point ...",24,4
4,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,Great. All right. So let's go to questions.,24,4


In [12]:
q_and_a_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1106 entries, 0 to 1105
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   question_answer_group_id  1106 non-null   int64 
 1   speaker                   1106 non-null   object
 2   role                      1106 non-null   object
 3   company                   1106 non-null   object
 4   content                   1106 non-null   object
 5   year                      1106 non-null   int64 
 6   quarter                   1106 non-null   int64 
dtypes: int64(3), object(4)
memory usage: 60.6+ KB


In [13]:
management_discussion_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype 
---  ------   --------------  ----- 
 0   speaker  18 non-null     object
 1   role     18 non-null     object
 2   company  18 non-null     object
 3   content  18 non-null     object
 4   year     18 non-null     int64 
 5   quarter  18 non-null     int64 
dtypes: int64(2), object(4)
memory usage: 996.0+ bytes


## Data Cleanup

In [14]:
## Check if the years and quarters are correct

list_of_years = management_discussion_df['year'].unique()
list_of_quarters = management_discussion_df['quarter'].unique()
print("Management Discussion Section:")
print(f'Years: {list_of_years}')
print(f'Quarters: {list_of_quarters}')

print("\n")

list_of_years = q_and_a_df['year'].unique()
list_of_quarters = q_and_a_df['quarter'].unique()
print("Q&A Section:")
print(f'Years: {list_of_years}')
print(f'Quarters: {list_of_quarters}')

Management Discussion Section:
Years: [23 24 25 22]
Quarters: [1 2 4 3]


Q&A Section:
Years: [23 24 25 22]
Quarters: [1 2 4 3]


In [15]:
## Check that the roles are accurate

management_discussion_df['role'].unique()

array(['Chief Financial Officer', 'Chairman, Chief Executive Officer'],
      dtype=object)

In [16]:
## Tidy up the roles in the presentation to be consistent

misspelt_roles_dict = {
    "  ": " ",
    ' ,': ',',
    'Of ficer': 'Officer',
    'Financ ial': 'Financial',
    'Morg an': 'Morgan',
    'Finan cial': 'Financial',
    'Fina ncial': 'Financial',
    'Fin ancial': 'Financial',
    'Analy st': 'Analyst',
    'Cha irman': 'Chairman',
    'JPMo rgan': 'JPMorgan',
    'JPMorganChase': 'JPMorgan Chase & Co.',
    'JPMorga n': 'JPMorgan',
    'JP Morgan': 'JPMorgan',
    'Off icer': 'Officer',
    'JPMor gan': 'JPMorgan',
    'JPM organ': 'JPMorgan',
    'Chair man': 'Chairman',
    'Membe r': 'Member',
    '-O': 'O',
    'Membe rOperating': 'Member Operating',
    'M ember': 'Member',
    'Offi cer': 'Officer',
    '& C o': '& Co',
    'Chas e': 'Chase',
    'C hief': 'Chief',
    'Oper ating': 'Operating',
    'Comm ittee': 'Committee',
    'Execut ive': 'Executive',
    'Financia l': 'Financial',
    'Ch ief': 'Chief',
    'Co .': 'Co.',
    'Officer ,': 'Officer,',
    'Financi al': 'Financial',
    'M ember': 'Member',
    'MemberOperating': 'Member Operating',
    'Chie f': 'Chief',
    'Mor gan': 'Morgan',
    'M organ': 'Morgan',
    'C apital': 'Capital',
    'Ev ercore': 'Evercore',
    'Ever core': 'Evercore',
    'Evercor e': 'Evercore',
    'Ame rica': 'America',
    'Amer ica': 'America',
    'P ortales': 'Portales',
    'Po rtales': 'Portales',
    'Seapor t': 'Seaport',
    'Seap ort': 'Seaport',
    'Farg o': 'Fargo',
    'Ca pital': 'Capital',
    'Ba nk': 'Bank',
    'Amer ica': 'America',
    'Secur ities': 'Securities',
    'Well s': 'Wells',
    'In c': 'Inc',
    'Autono mous': 'Autonomous',
    'Auton omous': 'Autonomous',
    'S ecurities': 'Securities',
    'M errill': 'Merrill',
    'Inc .': 'Inc.',
    'Deutsc he': 'Deutsche',
    'Chief Financial Officer & Member Operating Committee, JPMorgan Chase & Co.': 'Chief Financial Officer, JPMorgan Chase & Co.'
}



def correct_roles(role):
  for misspelt_role in misspelt_roles_dict.keys():
    if misspelt_role in role:
      role = role.replace(misspelt_role, misspelt_roles_dict[misspelt_role])
      break
  return role

In [17]:
management_discussion_df['role'] = management_discussion_df['role'].apply(correct_roles)
management_discussion_df['role'].unique()

array(['Chief Financial Officer', 'Chairman, Chief Executive Officer'],
      dtype=object)

In [18]:
q_and_a_df['role'].unique()

array(['Analyst', 'Chief Financial Officer',
       'Chairman, Chief Executive Officer', 'Chief Financial  Officer',
       'Chief Fina ncial Officer', 'Chairman, Chief Execut ive Officer',
       'Chief Fin ancial Officer', 'Ch ief Executive Officer, Chairman',
       'Chief Financia l Officer', 'Analy st',
       'Cha irman, Chief Executive Officer', 'Chief Financial Offi cer',
       'C hief Executive Officer, Chairman',
       'Chairman, Chief Executive Of ficer', 'Chie f Financial Officer',
       'Chief Finan cial Officer', 'Chairman, Chief  Executive Officer',
       'Chief Financi al Officer',
       'Chief Financial Officer, Member -Operating Committee',
       'Chief Financial Officer, Member -Operating Comm ittee',
       'Chief Financial Officer, Member -Operating  Committee',
       'Chief Financial Officer, M ember -Operating Committee',
       'Chief Financial Officer, Member -Oper ating Committee',
       'Chief Financial Officer, Membe r-Operating Committee',
       'C

In [19]:
q_and_a_df['role'] = q_and_a_df['role'].apply(correct_roles)
q_and_a_df['role'].unique()

array(['Analyst', 'Chief Financial Officer',
       'Chairman, Chief Executive Officer',
       'Chief Executive Officer, Chairman',
       'Chief Financial Officer, Member Operating Committee',
       'Chief Financial Officer, Member Operating Comm ittee',
       'Chief Financial Officer, Member -Operating Committee',
       'Chief Financial Officer, M ember Operating Committee',
       'Chief Financial Officer, Member Oper ating Committee',
       'Chief Financial Officer, Member-Operating Committee'],
      dtype=object)

In [20]:
q_and_a_df.sort_values(by=['year', 'quarter', 'question_answer_group_id'], ascending=True, inplace=True)
q_and_a_df.reset_index(drop=True, inplace=True)

management_discussion_df.sort_values(by=['year', 'quarter'], ascending=True, inplace=True)
management_discussion_df.reset_index(drop=True, inplace=True)

In [21]:
management_discussion_df.head()

Unnamed: 0,speaker,role,company,content,year,quarter
0,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Thanks, operator. Good morning, everyone. The ...",22,1
1,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Thanks, operator. Good morning, everyone. The ...",22,2
2,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Thank you very much. Good morning, everyone. A...",22,3
3,Jamie Dimon,"Chairman, Chief Executive Officer",JPMorgan Chase & Co.,"Yeah, Jeremy, thank you very much. Hello, ever...",22,3
4,Jeremy Barnum,Chief Financial Officer,JPMorgan Chase & Co.,"Yeah. Thanks, Jamie. Let's go ahead and open u...",22,3


In [22]:
management_discussion_df.to_csv('management_discussion_transcripts.csv')
q_and_a_df.to_csv('q_and_a_transcripts.csv')

# TEST AREA

In [23]:
# Parse the pdf into separate dataframes
pdf_text = extract_text_from_pdf('/content/Transcripts/2q22-earnings-transcript.pdf')
print(pdf_text)

   
 
2Q22 FINANCIAL RESULTS  
EARNINGS CALL TRANSCRIPT  
July 14, 2022 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
  
   
 
  
  
  
2 
 
MANAGEMENT DISCUSSION SECTION  
 ......................................................................................................................................................................................................................................................   
Operator:  Good morning, ladies and gentlemen. Welcome to J PMorgan Chase's Second Quarter 2022 Earnings Call. This call is being 
recorded. Your line will be muted for the duration of the call. We will now go live to the presentation. Please stand by.  
 
At this time, I would like to turn the call over to JPMorgan Chase's Chairman and CEO, Jamie Dimon; and Chief Financial Officer, Jeremy Barnum.  
 Mr. Barnum, please go ahead.  
 ...................................................................................................................................................

In [24]:
import pandas as pd
import re

text = """
QUESTION AND ANSWER SECTION

Operator:  The next question is coming from the line of John McDonald  from Autonomous. You may proceed.
............................................................................................................................. ........................................................ .............................................
John E. McDonald
Analyst, Autonomous Research  Q
Hi. Good morning. Jeremy, wanted to ask about your EaR disclosures, what we call your rate sensitivity disclosures. They look  a little different
than peers. And when we look at the sensitivity to 100 basis points of higher rates beyond the forward curve, it looks like you're liability
sensitive . Can you give us some context of maybe the limitations of that disclosure and how we should put that in context of the as sumptions
behind it?
............................................................................................................................. .....................................................................................................
Jeremy B arnum
Chief Financial Officer, JPMorgan Chase & Co.  A
Yeah. Thanks, John. And I'd love to have a very long conversation with you about this, but I'm going to keep it short here. I t's really all about
lags. So, as our disclosure says, we do not include the impact of reprice lags in our EaR calculation. So, as a result of that, the entire
calculation is based on modeled rates paid in the terminal state.

As you well know, right now, we're in the middle of some very significant lags, which are affecting the numbers quite a bit a nd which we expect
to persist for s ome time. So, as a result of that, what I would expect in the near term is something quite similar to what we've experienced this
year. As you know, this year, as rates have gone up, we've revised our NII outlook from $50 billion at the beginning of the y ear to now $61.5
billion. So, as we look forward in the near term from here, I would expect similar type sensitivities to rate fluctuations, g iven the lag
environment that we're in.
........................................................................... ............................................................................................................................. ..........................
John E. McDonald
Analyst, Autonomous Research  Q
And just to follow up on Jamie's comments about not ann ualizing the fourth quarter, is that where the risks lie to annualizing the fourth
quarter? What are some of the puts and takes – you said it might be down a little bit from that fourth quarter annualized.
.................................................. ............................................................................................................................. ...................................................

Jamie Dimon
Chairman & Chief Executive Officer, JPMorgan Chase & Co.  A
Yeah. Look, I've already mentioned you have a rapidly changing yield curve, deposit migration. Everyone does EaR differently.  So, one is lag .
One is we assume deposit migration; some people don't. Our ECR is included in  there; some people don't . And all o f that. I just think for your
models, because of all that kind of stuff, just use a number less than annualized in the $19 billion. So, instead of $76 bill ion, use a number like
$74 billion. And I just keep it as simple as possible. And we don't know. We h ope to beat that, but with all the stuff going on, you just got to be
a little cautious and conservative.
............................................................................................................................. ......................... ............................................................................
9

John E. McDonald
Analyst, Autonomous Research  Q
Okay. Thanks.
............................................................................................................................. .....................................................................................................

Operator:  The next question is coming from the line of Erika Najarian from UBS. You may proceed.
............................................................................................................................. .............................................................. .......................................
Jeremy Barnum
Chief Financial Officer, JPMorgan Chase & Co.  A
Yeah. So, good question on the multi -family, and the short answer is that, for us, it's pretty uncontroversially  no – no bleed -through. And the
reason is that while we are aware of some of the pressure on multi -family, that's in kind of different markets from the ones that we are actually
big in. So, it's higher -end stuff in much less supply constrained markets that is under more pres sure. And as you know, our multi -family
portfolio is much more affordable, supply constrained markets. And so, the performance there remains really very robust .
............................................................................................... ............................................................................................................................. ......
Ebrahim H. Poonawala
Analyst, Bank of America Merrill Lynch  Q
Got it.  Thank you.
............................................................................................................................. .....................................................................................................
.
Operator:  Next, we'll go to  the line of Erika Najarian from UBS. You may proceed .
............................................................................................................................. .....................................................................................................
Erika Najarian
Analyst, UBS S ecurities LLC  Q
Hi. Good morning. My first question is a follow -up on Matt's with regarding the buyback. You printed 15% CET1 in the quarter. On a net basis,
net of RWA growth, your net income produces 51 basis points every quarter. Again, that's net of R WA growth. I'm wondering what guideposts
you're looking for, Jeremy, in terms of that buyback increasing from that $2 billion a quarter. Do we need to wait for Basel III finalization, which
seems like it could be quite delayed? Or will having clarity in th e June D -FAST results – you mentioned the SCB – sort of be enough that you
could reconsider this pace over the medium term ?
............................................................................................................................ .....................................................................................................
Jeremy Barnum
Chief Financial Officer, JPMorgan Chase & Co.  A
Yeah. Erika, that's a good question. And I understand what you're asking – why you're asking it. I think the answer is going to be a little bit
unsatisfying, which is that this is classic decision -making under uncertainty and it's kind of a probabilistic cloud of a variety of different factors.
But, all the ingredients that you've listed are the r ight ingredients; right, very strong organic capital generation, uncertainty about the
finalization of the rule, uncertainty about the SCB requirements, and obviously, our normal capital hierarchy, which is that buybacks are always
at the bottom of the hie rarchy after we're done using the capital for our other priorities.

So, I think what I said previously stands, which is that we're sticking with the modest pace for now. But obviously, we have a lot of flexibility to
adjust that whenever we want under the  current regime, and we may well do that .
............................................................................................................................. ......................................................................................... ............
Erika Najarian
Analyst, UBS Securities LLC  Q
Thanks. And just as a follow -up, the $90 billion in expenses for 2024, does that contemplate a significant increase or the comeback of
investment banking that everybody seems to be expecting for 2024 ?
............................................................................................................................. .....................
"""

def parse_q_and_a(full_text):
    """
    Parses a Q&A transcript into a list of speaker, role, and text dictionaries,
    assuming speaker name is line 1, role is line 2, and text is subsequent lines.
    Handles the special 'Operator' case.

    Args:
        full_text (str): The complete transcript text.

    Returns:
        pd.DataFrame: A DataFrame with 'speaker', 'role', and 'text' columns.
    """
    entries = []

    # Add an implicit separator at the end to ensure the last block is captured
    processed_text = re.sub(r"^\s*QUESTION AND ANSWER SECTION\s*\n+", "", full_text, flags=re.MULTILINE).strip()
    processed_text += "\n........................................................................................"

    # Define the separator pattern (newline, dots, newline)
    separator_regex = r"\.{10,}"

    # Split the text into blocks using the separator
    blocks = re.split(separator_regex, processed_text)

    # Filter out any empty blocks that might result from splitting
    blocks = [block.strip() for block in blocks if block.strip()]
    question_number = 0

    for block in blocks:
        lines = block.split('\n')
        lines = [line.strip() for line in lines if line.strip()] # Clean empty lines within the block
        lines = [line for line in lines if not line.isdigit()]

        speaker_name = "N/A"
        role_name = "N/A"
        text_content = ""

        start_index = 0

        if not lines:
            continue # Skip empty blocks

        if lines[0].startswith('.'):
          start_index = 1

        # Handle the Operator case: Speaker and start of text are on the first line
        if lines[start_index].startswith("Operator:"):
            question_number = question_number + 1
            continue

        else: # Standard speaker: Name on line 1, Role on line 2, Text after
            if len(lines) >= 1:
                speaker_name = lines[start_index]
            if len(lines) >= 2:
                role_name = lines[start_index + 1]
                # Remove optional Q/A from role
                role_name = re.sub(r'\s*(Q|A)$', '', role_name).strip()
            if len(lines) > 2:
                text_content = "\n".join(lines[start_index + 2:])

        # Final cleanup for text content (e.g., removing any leading/trailing blank lines)
        text_content = text_content.strip()

        entries.append({
            "question_answer_group_id": question_number,
            "speaker": speaker_name,
            "role": role_name,
            "content": text_content
        })

    df = pd.DataFrame(entries)
    return df

# Run the parsing
parse_q_and_a(text)

Unnamed: 0,question_answer_group_id,speaker,role,content
0,1,John E. McDonald,"Analyst, Autonomous Research","Hi. Good morning. Jeremy, wanted to ask about ..."
1,1,Jeremy B arnum,"Chief Financial Officer, JPMorgan Chase & Co.","Yeah. Thanks, John. And I'd love to have a ver..."
2,1,John E. McDonald,"Analyst, Autonomous Research",And just to follow up on Jamie's comments abou...
3,1,Jamie Dimon,"Chairman & Chief Executive Officer, JPMorgan C...","Yeah. Look, I've already mentioned you have a ..."
4,1,John E. McDonald,"Analyst, Autonomous Research",Okay. Thanks.
5,2,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Yeah. So, good question on the multi -family, ..."
6,2,Ebrahim H. Poonawala,"Analyst, Bank of America Merrill Lynch",Got it. Thank you.
7,3,Erika Najarian,"Analyst, UBS S ecurities LLC",Hi. Good morning. My first question is a follo...
8,3,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Yeah. Erika, that's a good question. And I und..."
9,3,Erika Najarian,"Analyst, UBS Securities LLC","Thanks. And just as a follow -up, the $90 bill..."
