In [1]:
import os
!python3 --version

Python 3.10.12


In [2]:
!pip install requests
!pip install google-generativeai
!pip install pdfplumber
!pip install flask
# pyngrok



**SET API KEY**

In [3]:
os.environ["API_KEY"]="AIzaSyC8NkRrznR8GKQuM2CPGn6MC4xP0mk-Na4"    # ADD YOUR API KEY - IMPORTANT
if "API_KEY" not in os.environ:
    raise ValueError("API_KEY environment variable is not set.")

**EXTRACT TEXT FROM PDF**

In [4]:
import pdfplumber

def extract_text_from_pdf(pdf_file):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            text += page.extract_text() or ""
    return text

**Connecting to Google drive to read the earnings call of transcript of Dr Lal Pathlabs and One97 uploaded there**

In [5]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [6]:
file_path_1 = '/content/drive/My Drive/Earning Call Transcript - Dr Lal Pathlabs.pdf'
file_path_2 = '/content/drive/My Drive/Earning Call Transcript - One97 (Paytm).pdf'

text_dr_lal_pathlabs = extract_text_from_pdf(file_path_1)
text_one97 = extract_text_from_pdf(file_path_2)

**Summarize_text function calls Gemini-1.5-flash to summarize section (finanical performance, market dynamics, etc.) in text (transcript) of company (One97, etc.)**

In [7]:
import google.generativeai as genai

# Function to get a summary
def summarize_text(text, section, company):
    genai.configure(api_key=os.environ["API_KEY"])
    model = genai.GenerativeModel("gemini-1.5-flash")
    response = model.generate_content([f"Please summarize the {section} from {company} company's earning transcript:\n\n{text}"])    # We said 'Please summarize the following text', so it will summarize
    return response.text


**SUMMARIZE VIA FLASK APP**

In [8]:
from flask import Flask, request, jsonify

app = Flask(__name__)

@app.route("/")
def home():
    return {"message": "Welcome to the Flask API running on Colab!"}

@app.route("/earnings_transcript_summary", methods=["POST"])
def summarize():
    data = request.get_json()

    # Check for missing fields  - Error handling
    if "transcript_text" not in data:
      return jsonify({"error": "No transcript_text content provided"}), 400
    if "company_name" not in data:
      return jsonify({"error": "No company_name provided"}), 400

    content = data.get("transcript_text", "")
    company_name=data.get("company_name", "")

    # Check for empty data- Error handling
    if not content:
        return jsonify({"error": "No transcript_text content provided"}), 400
    if not company_name:
        return jsonify({"error": "No company_name provided"}), 400

    summary_financial_performance = summarize_text(content,"financial performance",company_name)
    summary_market_dynamics = summarize_text(content,"market dynamics",company_name)
    summary_expansion_plans = summarize_text(content,"expansion plans",company_name)
    summary_environmental_risks = summarize_text(content,"environmental_risks",company_name)
    summary_regulatory_or_policy_changes = summarize_text(content,"regulatory or policy changes",company_name)

    return jsonify({"company_name": company_name, "financial_performance": summary_financial_performance, "market_dynamics": summary_market_dynamics, "expansion_plans": summary_expansion_plans, "environmental_risks": summary_environmental_risks, "regulatory_or_policy_changes": summary_regulatory_or_policy_changes})


**Start app**

In [9]:
from threading import Thread

# Function to run the app
def run_app():
    app.run(host='0.0.0.0', port=5002)

# Start the app in a separate thread
Thread(target=run_app).start()

 * Serving Flask app '__main__'
 * Debug mode: off


In [10]:
company1="Dr Lal Pathlabs"
text_dr_lal_pathlabs = extract_text_from_pdf(file_path_1)

company2="One97 (Paytm)"
text_one97 = extract_text_from_pdf(file_path_2)

 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5002
 * Running on http://172.28.0.12:5002
INFO:werkzeug:[33mPress CTRL+C to quit[0m


**Local test - Summarize Dr Lal Pathlabs**

In [11]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "http://127.0.0.1:5002/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 08:07:14] "POST /earnings_transcript_summary HTTP/1.1" 200 -


Response: {'company_name': 'Dr Lal Pathlabs', 'environmental_risks': 'The Dr Lal PathLabs Q2 & H1 FY25 earnings transcript does not explicitly discuss environmental risks.  The focus is entirely on financial performance, market dynamics, and business strategy.  No mention is made of environmental impact from lab operations, waste disposal, or carbon footprint.\n', 'expansion_plans': 'Dr. Lal PathLabs\' Q2 FY25 earnings call transcript reveals expansion plans focusing on organic growth, primarily through:\n\n* **Opening 15-20 new labs in FY25:**  This is a slight acceleration compared to previous years, with roughly half already opened in H1 FY25.  The company expects to maintain this pace for perhaps one more year before returning to a pre-COVID expansion rate.\n\n* **Increasing collection centers:**  The company is adding collection centers to support the new labs and meet increased demand, aiming to maintain a ratio of approximately 28-29 collection centers per retail lab.  The hub-a

**Local test - Summarize One97 (Paytm)**

In [12]:
import requests
import json

# Define the company name and transcript text
company_name = "One97 (Paytm)"
transcript_text = text_one97

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "http://127.0.0.1:5002/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 08:07:37] "POST /earnings_transcript_summary HTTP/1.1" 200 -


Response: {'company_name': 'One97 (Paytm)', 'environmental_risks': "The provided Paytm earnings transcript does not discuss environmental risks.  The conversation focuses entirely on the company's financial performance, specifically its lending models (including the new DLG model), cost reduction strategies, revenue streams, and future growth plans.  There is no mention of environmental, social, or governance (ESG) factors or any potential environmental impact of Paytm's operations.\n", 'expansion_plans': 'Paytm\'s (One97) Q2 FY25 earnings call revealed a focus on expansion within existing lending and payment services, rather than aggressive expansion into new areas.  Key expansion plans include:\n\n* **Scaling Lending Operations:**  Paytm is expanding its merchant loan business using a Direct Lending Guarantee (DLG) model, aligning with industry practices. While the DLG model involves upfront costs,  the overall take rate (net of DLG costs) is expected to remain above 5% over the loan

**Local test - Summarize Dr Lal Pathlabs with company_name field as empty string**

In [13]:
import requests
import json

# Define the company name and transcript text
company_name = ""
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "http://127.0.0.1:5002/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 08:07:37] "[31m[1mPOST /earnings_transcript_summary HTTP/1.1[0m" 400 -


Error: 400 {"error":"No company_name provided"}



**Local test - Summarize Dr Lal Pathlabs with transcript_text field as empty string**

In [14]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = ""

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "http://127.0.0.1:5002/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 08:07:37] "[31m[1mPOST /earnings_transcript_summary HTTP/1.1[0m" 400 -


Error: 400 {"error":"No transcript_text content provided"}



**Local test - Summarize Dr Lal Pathlabs with company_name field absent**

In [15]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "http://127.0.0.1:5002/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 08:07:37] "[31m[1mPOST /earnings_transcript_summary HTTP/1.1[0m" 400 -


Error: 400 {"error":"No company_name provided"}



**Local test - Summarize Dr Lal Pathlabs with transcript_text field absent**

In [16]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "company_name": company_name,
}

# Define the API endpoint
url = "http://127.0.0.1:5002/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


INFO:werkzeug:127.0.0.1 - - [30/Dec/2024 08:07:37] "[31m[1mPOST /earnings_transcript_summary HTTP/1.1[0m" 400 -


Error: 400 {"error":"No transcript_text content provided"}



**PYTHONANYWHERE API TEST - Summarize Dr Lal Pathlabs**

In [29]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "https://kshitijdegg.pythonanywhere.com/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


Response: {'company_name': 'Dr Lal Pathlabs', 'environmental_risks': "The Dr. Lal PathLabs Q2 & H1 FY25 earnings transcript does not explicitly mention specific environmental risks.  The discussion focuses on business performance, market dynamics, expansion plans, and financial results.  There is no mention of environmental regulations, waste management, carbon footprint, or any other environmental concerns related to the company's operations.\n", 'expansion_plans': 'Dr. Lal PathLabs\' Q2 FY25 earnings transcript reveals expansion plans focusing on organic growth, primarily through:\n\n* **Increased lab and collection center openings:**  They plan to add 15-20 new labs this fiscal year, accelerating the addition of collection centers to support them and meet rising demand. This expansion targets underserved Tier-3 and Tier-4 markets and strengthens presence in core regions.  Approximately half of the planned labs were already added in the first half of the fiscal year.\n\n* **Expanding

**PYTHONANYWHERE API test - Summarize One97(Paytm)**

In [24]:
import requests
import json

# Define the company name and transcript text
company_name = "One97 (Paytm)"
transcript_text = text_one97

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "https://kshitijdegg.pythonanywhere.com/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


Response: {'company_name': 'One97 (Paytm)', 'environmental_risks': "The provided Paytm earnings transcript does not discuss environmental risks.  The conversation focuses entirely on the company's financial performance, particularly its lending models (including the Distributed Ledger Guarantee model), cost structure, and future growth strategies.  There is no mention of environmental impact, sustainability initiatives, or any related concerns.\n", 'expansion_plans': "Paytm's Q2 FY25 earnings call revealed a focus on expanding its lending business through increased monetization of existing devices and strategic partnerships.  Key expansion plans include:\n\n* **Expansion of the Direct Lending Guarantee (DLG) model:** Paytm is transitioning to a DLG model for merchant loans, aligning with industry practices.  While this initially incurs upfront costs (fully expensed in the current quarter), it's expected to result in higher overall take rates (above 5% net of DLG costs) over the life of

**PYTHONANYWHERE API TEST - Summarize Dr Lal Pathlabs with company_name field as empty string - Error handling**

In [25]:
import requests
import json

# Define the company name and transcript text
company_name = ""
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "https://kshitijdegg.pythonanywhere.com/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


Error: 400 {"error":"No company_name provided"}



**PYTHONANYWHERE API TEST - Summarize Dr Lal Pathlabs with transcript_text field as empty string - Error handling**

In [26]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = ""

# Create the payload
payload = {
    "company_name": company_name,
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "https://kshitijdegg.pythonanywhere.com/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


Error: 400 {"error":"No transcript_text content provided"}



**PYTHONANYWHERE API TEST - Summarize Dr Lal Pathlabs with company_name field absent - Error handling**

In [27]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "transcript_text": transcript_text
}

# Define the API endpoint
url = "https://kshitijdegg.pythonanywhere.com/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


Error: 400 {"error":"No company_name provided"}



**PYTHONANYWHERE API TEST - Summarize Dr Lal Pathlabs with transcript_text field absent - Error handling**

In [28]:
import requests
import json

# Define the company name and transcript text
company_name = "Dr Lal Pathlabs"
transcript_text = text_dr_lal_pathlabs

# Create the payload
payload = {
    "company_name": company_name,
}

# Define the API endpoint
url = "https://kshitijdegg.pythonanywhere.com/earnings_transcript_summary"

# Send the POST request with the JSON payload
response = requests.post(url, json=payload)

# Print the response
if response.status_code == 200:
    print("Response:", response.json())
else:
    print("Error:", response.status_code, response.text)


Error: 400 {"error":"No transcript_text content provided"}

