In [131]:
from utils import get_earnings_transcript

quarter = "Q4"
ticker = "AAPL"
year = 2023
resp_dict, speakers_list = get_earnings_transcript(
            quarter, ticker, year
        )

In [132]:
resp_dict

{'symbol': 'AAPL',
 'quarter': 4,
 'year': 2023,
 'date': '2023-11-02 21:32:21',
 'content': "Operator: Good day, and welcome to the Apple Q4 Fiscal Year 2023 Earnings Conference Call. Today's call is being recorded. At this time, for opening remarks and introductions, I would like to turn the call over to Suhasini Chandramouli, Director of Investor Relations. Please go ahead.\nSuhasini Chandramouli: Thank you. Good afternoon, and thank you for joining us. Speaking first today is Apple's CEO, Tim Cook. And he'll be followed by CFO, Luca Maestri. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook, including the potential impact of macroeconomic conditions on the company's business and resu

In [133]:
speakers_list

['Erik Woodring',
 'Suhasini Chandramouli',
 'Wamsi Mohan',
 'Amit Daryanani',
 'Tim Cook',
 'Harsh Kumar',
 'Luca Maestri',
 'Operator',
 'Aaron Rakers',
 'Krish Sankar',
 'Richard Kramer',
 'Michael Ng',
 'Ben Reitzes',
 'David Vogt']

In [134]:
print(resp_dict['content'])

Operator: Good day, and welcome to the Apple Q4 Fiscal Year 2023 Earnings Conference Call. Today's call is being recorded. At this time, for opening remarks and introductions, I would like to turn the call over to Suhasini Chandramouli, Director of Investor Relations. Please go ahead.
Suhasini Chandramouli: Thank you. Good afternoon, and thank you for joining us. Speaking first today is Apple's CEO, Tim Cook. And he'll be followed by CFO, Luca Maestri. After that, we'll open the call to questions from analysts. Please note that some of the information you'll hear during our discussion today will consist of forward-looking statements, including without limitation, those regarding revenue, gross margin, operating expenses, other income and expense, taxes, capital allocation and future business outlook, including the potential impact of macroeconomic conditions on the company's business and results of operations. These statements involve risks and uncertainties that may cause actual resul

In [42]:
content = resp_dict['content']

In [84]:
import re

pattern = re.compile(r"\n(.*?):")
matches = pattern.finditer(content)

speakers_list = []
ranges = []
for match_ in matches:
    # print(match.span())
    span_range = match_.span()
    # first_idx = span_range[0]
    # last_idx = span_range[1]
    ranges.append(span_range)
    speakers_list.append(match_.group())

In [85]:
import re
speakers_list = [re.sub("\n","",sl) for sl in speakers_list]
speakers_list = [re.sub(":","",sl) for sl in speakers_list]

In [101]:
from langchain.schema import Document

docs = []

for idx,speaker in enumerate(speakers_list[:-1]):
    start_range = ranges[idx][1]
    end_range = ranges[idx+1][0]
    speaker_text =  content[start_range+1:end_range]
    # docs.append({
    #     "speaker_text":speaker_text,
    #     "speaker":speaker
    # })
    docs.append(
        Document(page_content=speaker_text,metadata={"speaker":speaker})
    )
# docs.append({
#     "speaker_text":content[ranges[-1][1]:],
#     "speaker":speakers_list[-1]
# })
    
docs.append(
        Document(page_content=content[ranges[-1][1]:],metadata={"speaker":speakers_list[-1]})
    )

In [111]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    # Set a really small chunk size, just to show.
    chunk_size = 512,
    chunk_overlap  = 64,
    length_function = len,
    # is_separator_regex = False,
)

In [112]:
len(docs)

62

In [113]:
split_docs = text_splitter.split_documents(docs)

In [114]:
len(split_docs)

134

In [116]:
split_docs_qdrant = [{"speaker_text":doc.page_content,"speaker":doc.metadata['speaker']} for doc in split_docs]

In [118]:
# Import client library
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

qdrant_client = QdrantClient("http://localhost:6333")

In [119]:
from sentence_transformers import SentenceTransformer
import numpy as np
import json
import pandas as pd
from tqdm.notebook import tqdm
import torch
from qdrant_client import models, QdrantClient

device = "cuda" if torch.cuda.is_available() else "cpu"

encoder = SentenceTransformer(
    "all-MiniLM-L6-v2", device=device
)  # or device="cpu" if you don't have a GPU

qdrant_client.recreate_collection(
    collection_name="earning_calls",
    vectors_config=VectorParams(size=encoder.get_sentence_embedding_dimension(), distance=Distance.COSINE),
)

True

In [121]:
qdrant_client.upload_records(
    collection_name="earning_calls",
    records=[
        models.Record(
            id=idx, vector=encoder.encode(doc["speaker_text"]).tolist(), payload=doc
        )
        for idx, doc in enumerate(split_docs_qdrant)
    ],
)

In [124]:
from qdrant_client.models import VectorParams, Distance, Field, FieldCondition, MatchAny, Filter, Match

hits = qdrant_client.search(
    collection_name="earning_calls",
    query_vector=encoder.encode("What was the Quarterly revenue").tolist(),
    limit=5,
    query_filter=models.Filter(
        must=[
            models.FieldCondition(
                key="speaker",
                match=models.MatchAny(
                    any=speakers_list,
                ),
            )
        ]
    ),
    search_params=models.SearchParams(hnsw_ef=256,exact=True)
)

relevant_docs = []
for hit in hits:
    relevant_docs.append(hit.payload)

relevant_docs

[{'speaker': 'Luca Maestri',
  'speaker_text': 'Thank you, Tim, and good afternoon, everyone. Revenue for the June quarter was $81.8 billion, down 1% from last year and better than our expectations despite nearly 4 percentage points of negative impact from foreign exchange. On a constant currency basis, our revenue grew year-over-year in total and in the majority of the markets we track. We set June quarter records in both Europe and Greater China and continue to see strong performance across our emerging markets driven by iPhone. Products revenue was'},
 {'speaker': 'Tim Cook',
  'speaker_text': 'Thank you, Saori. Good afternoon, everyone, and thanks for joining us. Today, Apple is reporting revenue of $81.8 billion for the June quarter, better than our expectations. We continued to see strong results in emerging markets, driven by robust sales of iPhone with June quarter total revenue records in India, Indonesia, Mexico, the Philippines, Poland, Saudi Arabia, Turkey and the UAE. We s

In [128]:
relevant_speaker_text = {k:"" for k in speakers_list}

for rd in relevant_docs:
    relevant_speaker_text[rd['speaker']]+=rd['speaker_text'] + " "

In [129]:
relevant_text = ""
for speaker,text in relevant_speaker_text.items():
    relevant_text+=speaker + ": "
    relevant_text+=text + "\n\n"
    

In [130]:
relevant_text

"Saori Casey: \n\nTim Cook: Thank you, Saori. Good afternoon, everyone, and thanks for joining us. Today, Apple is reporting revenue of $81.8 billion for the June quarter, better than our expectations. We continued to see strong results in emerging markets, driven by robust sales of iPhone with June quarter total revenue records in India, Indonesia, Mexico, the Philippines, Poland, Saudi Arabia, Turkey and the UAE. We set June quarter records in a number of other countries as well, including France, the Netherlands and Austria. And we \n\nLuca Maestri: Thank you, Tim, and good afternoon, everyone. Revenue for the June quarter was $81.8 billion, down 1% from last year and better than our expectations despite nearly 4 percentage points of negative impact from foreign exchange. On a constant currency basis, our revenue grew year-over-year in total and in the majority of the markets we track. We set June quarter records in both Europe and Greater China and continue to see strong performanc