# Setup, Constants, and Imports

In [11]:
import os
import sys
import logging

## Notebook Configs

In [12]:
IS_COLAB = False # Update this if running in Google Colab
OUTPUT_PROCESSED_FILES = True

## Constants

In [13]:
REPO_URL = "https://github.com/EErlando/Quarterly-Bytes.git"
REPO_NAME = "src" 
ALL_TRANSCRIPTS_PATH = "data/raw/JP Morgan/Transcripts"
NOTEBOOK_DIR = "1_data_extraction_and_processing" # Update this to your notebook directory

## Clone and Pull Latest from Repository - Colab Specific

In [14]:

if IS_COLAB:
    if os.path.exists(REPO_NAME):
        print(f"Directory '{REPO_NAME}' already exists. Pulling latest changes...")
        %cd {REPO_NAME}
        !git pull --quiet
        %cd ..
    else:
        print(f"Cloning repository into '{REPO_NAME}'...")
        !git clone --quiet {REPO_URL} {REPO_NAME}
        print("Clone complete.")

    sys.path.append('/content/src/')
else:
    if os.path.basename(os.getcwd()) == NOTEBOOK_DIR:
        os.chdir('../../')

logging.basicConfig(level=logging.ERROR, format='%(levelname)s: %(message)s')

## Local Imports

In [15]:
from src.utils.pdf_utils import extract_transcripts_pdf_df_from_dir, BankType

# Get Transcript Files

In [16]:
qna_df, discussion_df = extract_transcripts_pdf_df_from_dir(ALL_TRANSCRIPTS_PATH, BankType.JPMORGAN)

In [17]:
qna_df.head()

Unnamed: 0,question_answer_group_id,speaker,role,content,year,quarter
0,0,Steven Chubak,"Analyst, Wolfe Research LLC","Hey, good morning.",,
1,0,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Good morning, Steve.",,
2,0,Steven Chubak,"Analyst, Wolfe Research LLC","So, Jamie, I was actually hoping to get your p...",,
3,0,Jamie Dimon,"Chairman & Chief Executive Officer, JPMorgan C...","Well, I think you were already kind of complet...",,
4,0,Steven Chubak,"Analyst, Wolfe Research LLC",Got it. And just in terms of appetite for the ...,,


In [18]:
qna_df.year.value_counts()

Series([], Name: count, dtype: int64)

In [19]:
discussion_df.head()

Unnamed: 0,speaker,role,content,year,quarter
0,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Thanks, and good morning, everyone. The presen...",,
1,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Thanks, operator. Good morning, everyone. The ...",,
2,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Thank you very much. Good morning, everyone. A...",,
3,Jamie Dimon,"Chairman & Chief Executive Officer, JPMorgan C...","Yeah, Jeremy, thank you very much. Hello, ever...",,
4,Jeremy Barnum,"Chief Financial Officer, JPMorgan Chase & Co.","Yeah. Thanks, Jamie. Let's go ahead and open u...",,


In [20]:
if OUTPUT_PROCESSED_FILES:
    qna_df.to_csv('data/processed/JP Morgan/qna_df.csv', index=False)
    discussion_df.to_csv('data/processed/JP Morgan/discussion_df.csv', index=False)
    print("Processed files saved successfully.")

Processed files saved successfully.
