In [1]:
import getpass
import json
from openai import OpenAI
import warnings
warnings.filterwarnings('ignore')
import pdfkit
import requests
from serpapi import GoogleSearch
import re
import os
import pdfplumber
import pandas as pd
from pypdf import PdfReader
import tiktoken
from IPython.display import display
# !pip install pdfkit --quiet

In [None]:
import getpass
os.environ['SERP_API_KEY'] = getpass.getpass('Enter SERP API Key')
os.environ['OPENAI_API_KEY'] = getpass.getpass('Enter OpenAI API Key')

In [8]:
serpapi_key = "Enter Your API key"
client = OpenAI()

### Using Google SERPAPI - In Use

In [4]:
def fetch_latest_credit_rating(company_name, api_key):
    # Search query setup for SERPAPI
    params = {
        "api_key": api_key,
        "engine": "google",
        "q": f"{company_name} credit rating reporting latest",
        "num": 10
    }

    try:
        # Initialize GoogleSearch with parameters
        search = GoogleSearch(params)
        results = search.get_dict()
        # Check if 'organic_results' key is present in the response
        if "organic_results" not in results:
            print("No results found")
            return None

        # Process the top 10 organic results
        extracted_data = []
        for result in results["organic_results"]:
            title = result.get("title", "Title not found")
            snippet = result.get("snippet", "Snippet not found")
            date = result.get('date',"DATE not found")
            url = result.get("link", "URL not found")

            
            # Append the processed result
            extracted_data.append({
                "Title": title,
                "Snippet": snippet,
                "Date": date,
                "URL": url
            })
        
        return extracted_data

    except Exception as e:
        print(f"An error occurred: {e}")
        return None


### GPT Check

In [5]:
def get_gpt_reply(company_name,data, model ="gpt-4o-mini"):
# Define the function schema for date and URL extraction
    function_schema = {
        "name": "get_date_and_url",
        "description": "Function to retrieve URL containing link to latest report and to get date of latest report",
        "parameters": {
            "type": "object",
            "properties": {
                "date": {
                    "type": "string",
                    "description": "The latest date on which credit report is published"
                },
                "url": {
                    "type": "string",
                    "description": "The URL to the latest credit rating report containing either \"PDF\" keyword or \".html\" keyword."
                }
            },
            "required": ["date", "url"]
        }
    }
    system_prompt = '''
         You are a financial assistant. You will be provided list of json objects containing titles, snippets, date and urls. 
         - In given json objects, you will look for url which can give me latest credit report of a company. 
         - **Along with being latest, make sure that url (which is basically in string format) must either contain "PDF" keyword or ".html" keyword.**
         
         Also return me the **date** corresponding to the above url.
         '''
    
    prompt = f'''
        Provide me the url which can led to the latest credit rating report.
        Desired url must contain keywords like 'PDF' in them or ends with '.html' or ends with '.pdf'
        
        The company name is {company_name} and list of json objects is :\
        
        {data}
        '''
    # Create the API call using the OpenAI() class
    response = client.chat.completions.create(
        model=model,  # Replace with your chosen model
        messages=[
            {"role":"system", "content": system_prompt,
             "role": "user", "content": prompt
             }
        ],
        functions=[function_schema],
        function_call={"name": "get_date_and_url"}
    )
    
    # Display the response with date and URL
    return json.loads(response.choices[0].message.function_call.arguments)


### PDF Download

In [54]:
def download_pdf(url,company_name):
    filename =f'{company_name}_CR.pdf'
    if '.html' in url:
        try:
            pdfkit.from_url(url,filename)
            
            return f"PDF downloaded successfully and saved as '{filename}'"
        except:
            return f"Failed to download PDF."
            
    else:
        response = requests.get(url,verify =False)
        if response.status_code == 200:
            with open(filename, "wb") as file:
                file.write(response.content)
            return f"PDF downloaded successfully and saved as '{filename}'"
        else:
            return f"Failed to download PDF. Status code: {response.status_code}"
    


### Check Here...

In [11]:
company_name = input("Enter Company Name: ")
data= fetch_latest_credit_rating(company_name, serpapi_key)
print("Information is Fetched from Google Successfully")
reply = get_gpt_reply(company_name,data)
print("Response is taken from GPT Successfully")
print(reply)
url = reply['url']
download_pdf(url,company_name)

"PDF downloaded successfully and saved as 'muthoot Finance_CR.pdf'"

In [12]:
# Display results
if data:
    for result in data:
        print("Title:", result["Title"])
        print("Snippet:", result["Snippet"])
        print("Date:", result["Date"])
        print("URL:", result["URL"])
        print("-----")


Title: Muthoot Finance Ltd Credit Ratings
Snippet: ... Muthoot Finance Ltd Debt Level: senior secured. Issue: USD 400 mln 6.375% bond/note 23-Apr-2029. 23-Oct-2024. BB New Rating Long Term Rating Rating History.
Date: DATE not found
URL: https://www.fitchratings.com/entity/muthoot-finance-ltd-82941140
-----
Title: Fitch Affirms Muthoot Finance at 'BB'; Outlook Stable
Snippet: Fitch Ratings has affirmed India-based Muthoot Finance Ltd's (MFL) Long-Term Foreign- and Local-Currency Issuer Default Ratings (IDRs) at 'BB'. The Outlook is ...
Date: Aug 29, 2024
URL: https://www.fitchratings.com/research/non-bank-financial-institutions/fitch-affirms-muthoot-finance-at-bb-outlook-stable-29-08-2024
-----
Title: Muthoot Finance Ltd. Credit Rating
Snippet: Muthoot Finance Ltd. continues to outperform its peers in terms of asset quality and profitability, despite some pressure on margin and growth due to intense ...
Date: DATE not found
URL: https://disclosure.spglobal.com/ratings/en/regulatory/org

# Part 2

###  PDF to Excel of Tables

In [18]:
def make_tables(pdf_file):

    output_file = f"{company_name}_tables.xlsx"
    # Open the PDF
    try:
        with pdfplumber.open(pdf_file) as pdf:
            all_tables = []
            text =""
            for page in pdf.pages:
                tables = page.extract_tables()
                for table in tables:
                    df = pd.DataFrame(table)
                    all_tables.append(df)
        
        # Save all extracted tables to Excel
        with pd.ExcelWriter(output_file) as writer:
            for i, table in enumerate(all_tables):
                table.to_excel(writer, sheet_name=f"Table_{i+1}", index=False)
    
        print(f"Tables saved to {output_file}")
        return all_tables,text
    except Exception as e:
        print(f"Failed to create tables as an error occured: {e}")
        return [],[]


#### **Bonus**: For extracting financial metric table only

In [15]:
def find_finance_table(pdf_path):

    # List to store DataFrames of tables containing 'PAT'
    dfs_with_pat = []
    
    with pdfplumber.open(pdf_path) as pdf:
        for page_num, page in enumerate(pdf.pages, start=1):
            # Extract tables from the current page
            tables = page.extract_tables()
            
            for table_idx, table in enumerate(tables, start=1):
                # Check if any row in the table contains the keyword 'PAT'
                if any("PAT" in cell for row in table for cell in row if cell):  # Avoid NoneType errors
                    # Convert the table into a DataFrame
                    df = pd.DataFrame(table)
                    dfs_with_pat.append({
                        "page": page_num,
                        "table_index": table_idx,
                        "dataframe": df
                    })
    
    # Output the DataFrames
    return dfs_with_pat


In [16]:
pdf ="OXYZON finance_CR.pdf"
just = find_finance_table(pdf)

In [62]:
output_excel = "tables_with_pat.xlsx"
with pd.ExcelWriter(output_excel, engine="xlsxwriter") as writer:
    for idx, table_info in enumerate(just, start=1):
        sheet_name = f"Page{table_info['page']}_Table{table_info['table_index']}"
        table_info['dataframe'].to_excel(writer, index=False, header=False, sheet_name=sheet_name)

print(f"Filtered tables saved to {output_excel}")

Filtered tables saved to tables_with_pat.xlsx


In [19]:
for table_info in just:
    print(f"Page: {table_info['page']}, Table Index: {table_info['table_index']}")
    print(table_info['dataframe'])
    print("\n")

Page: 4, Table Index: 1
   0                         1  2        3       4     5        6       7   \
0            Oxyzo (standalone)              FY2021                 FY2022   
1                                   Audited    None  None  Audited    None   
2                           PAT        39.9    None  None     69.3    None   
3                     Net worth       449.6    None  None  1,392.3    None   
4                           AUM     1,389.4    None  None  2,591.7    None   
5                  Total assets     1,643.0    None  None  3,439.2    None   
6      Return on average assets        3.1%    None  None     2.7%    None   
7      Return on average equity       10.4%    None  None     7.5%    None   
8               Gearing (times)         2.6    None  None      1.4    None   
9                          CRAR       32.3%    None  None    48.4%    None   
10                Gross stage 3        1.2%    None  None     1.0%    None   
11                  Net stage 3        0

### Exctracting Text and Counting tokens

In [57]:
## Written function for both types of pdf reader : PdfReader and pdfplumber. Try any :)

def extract_text_with_pdfreader(pdf_file):

    try:
         reader = PdfReader(pdf_file)
         all_text=[]
         for i,page in enumerate(reader.pages):
           text = page.extract_text()
           if text:  # Append only if text is found, also a small trick to add page number at the start of any page
                all_text.append(f"Page number is: {i+1}\n,{text}") 
           else:
               all_text.append(f"Page number is: {i+1}")
        
        # Combine text from all pages
         return 1,"\n".join(all_text)

    except Exception as e:
        print("An error occured in extracting text")
        return 0, f"An error occurred: {e}"



def extract_text_with_pdfplumber(pdf_file):

    try:
        all_text = []

        # Open the PDF file
        with pdfplumber.open(pdf_file) as pdf:
            # Iterate through each page
            for i,page in enumerate(pdf.pages):
                # Extract text from the page
                text = page.extract_text()
                if text:  # Append only if text is found
                    all_text.append(f"Page number is: {i+1}\n{text}")
                else:
                    all_text.append(f"Page number is: {i+1}\n")
                    
        
        # Combine text from all pages
        return 1,"\n".join(all_text)

    except Exception as e:
        return 0, f"An error occurred: {e}"



def calculate_tokens(text, model="gpt-4o-mini"):

    try:
        # Load the tokenizer for the specified model
        tokenizer = tiktoken.encoding_for_model(model)
        tokens = tokenizer.encode(text)
        return len(tokens)
    
    except Exception as e:
        return f"An error occurred: {e}"


### Extraction from CR using LLM

In [21]:
# Function to extract information from the credit report
def gpt_final_info(report_text,model="gpt-4o-mini"):
    
    function_schema = {
    "name": "extract_credit_report_info",
    "description": "Extract credit ratings, net worth, and company information from a credit rating report.",
    "parameters": {
        "type": "object",
        "properties": {
            "credit_ratings": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "instrument": {"type": "string", "description": "Name of the financial instrument"},
                        "rating": {"type": "string", "description": "The credit rating assigned"},
                        "page_numbers": {
                            "type": "array",
                            "items": {"type": "integer"},
                            "description": "Page numbers where this data is found"
                        }
                    },
                },
                "description": "List of credit ratings for different financial instruments."
            },
            "net_worth": {
                "type": "object",
                "properties": {
                    "company_net_worth": {"type": "string", "description": "Net worth of the company"},
                    "page_numbers": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "description": "Page numbers where net worth information is found"
                    }
                },
                "description": "Net worth of the company and its page numbers."
            },
            "company_info": {
                "type": "object",
                "properties": {
                    "about_company": {"type": "string", "description": "General information about the company"},
                    "page_numbers": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "description": "Page numbers where company information is found"
                    }
                },
                "description": "General information about the company and its page numbers."
            }
        },
        "required": ["credit_ratings", "net_worth", "company_info"]
    }
    }

    response = client.chat.completions.create(
        model=model,
        messages=[
            {
                "role": "system",
                "content": ''' You are a credit rating report analysis assistant. You will be provided with text. You have to return:
                         1.) Credit Ratings of different financial instruments along with the name of financial instruments
                         2.) Net worth of company: Net worth of a company refers to the value that remains after subtracting total liabilities from total assets. The net worth is also known as shareholders' equity, book value, or net assets. A positive net worth indicates that a company's assets exceed its liabilities, suggesting financial stability and the ability to cover obligations.
                         3.) About the company: General information about company like what it to do and what is its business

                         Along with above data, you also have to return the page number where you get the above data.
                         Actually, page number is mentioned at the starting of each section in the text, from there you can get page number.

                         In case, if you are not able to get any of above information just return 'None' in that case
                ''' 
            },
            {
                "role": "user",
                "content": f"Extract the details from the provided credit rating report: \n\n{report_text}"
            }
        ],
        functions=[function_schema],
        function_call={"name": "extract_credit_report_info"}
    )
    
    # Extract and return the response data
    extracted_data = response.choices[0].message.function_call.arguments
    return json.loads(extracted_data)


def create_dataframe(data):

    try:
    
        credit_ratings_df = pd.DataFrame(data["credit_ratings"])
        credit_ratings_df["page_numbers"] = credit_ratings_df["page_numbers"].apply(lambda x: ', '.join(map(str, x)))
        
        net_worth_df = pd.DataFrame([data["net_worth"]])
        net_worth_df["page_numbers"] = net_worth_df["page_numbers"].apply(lambda x: ', '.join(map(str, x)))
        
        # Extract company information into a DataFrame
        company_info_df = pd.DataFrame([data["company_info"]])
        company_info_df["page_numbers"] = company_info_df["page_numbers"].apply(lambda x: ', '.join(map(str, x)))
        
        credit_ratings_df.rename(columns={"page_numbers": "Credit Rating Page No."}, inplace=True)
        net_worth_df.rename(columns={"page_numbers": "Net Worth Page No."}, inplace=True)
        company_info_df.rename(columns={ "page_numbers": "Company Info Page No."}, inplace=True)
        
        # Concatenate the DataFrames along axis=1
        final_df = pd.concat([credit_ratings_df, net_worth_df, company_info_df], axis=1)
    
        return final_df
    except Exception as e:
        print(f"Error occured in creating a final dataframe: {e}")
        return f"Error occured in creating a final dataframe: {e}"

### Check here - II

In [29]:
company_name = input("Enter Company Name: ")
data= fetch_latest_credit_rating(company_name, api_key)
print("Information is Fetched from Google Successfully")
reply = get_gpt_reply(company_name,data)
print("Response is taken from GPT Successfully")
print(reply)
url = reply['url']
status = download_pdf(url,company_name)
print(status)

filename = f"{company_name}_CR.pdf"  # Replace with your PDF file path
flag,extracted_text = extract_text_with_pdfplumber(filename)

if (flag):
    make_tables(filename)
    token_count = calculate_tokens(extracted_text)
    print(f"Token count for the input text: {token_count}")
    if (token_count>15000):
        print("Token Count hit the limit, so stopping the code")
    else:
        extracted_info  = gpt_final_info(extracted_text)
        results =  create_dataframe(extracted_info)
        display(extracted_info)

Tables saved to muthoot Finance_tables.xlsx
Token count for the input text: 6621


{'credit_ratings': [{'instrument': 'Long term – Fund based/CC',
   'rating': '[ICRA]A+ (Stable)',
   'page_numbers': [1]},
  {'instrument': 'Long term – Fund-based TL',
   'rating': '[ICRA]A+ (Stable)',
   'page_numbers': [1]},
  {'instrument': 'Long term – Unallocated',
   'rating': '[ICRA]A+ (Stable)',
   'page_numbers': [1]},
  {'instrument': 'LT-Market linked debenture',
   'rating': 'PP-MLD[ICRA]A+ (Stable)',
   'page_numbers': [1]},
  {'instrument': 'Commercial paper programme',
   'rating': '[ICRA]A1+',
   'page_numbers': [1]},
  {'instrument': 'Non-convertible debenture',
   'rating': '[ICRA]A+ (Stable)',
   'page_numbers': [1]},
  {'instrument': 'LT borrowing programme',
   'rating': '[ICRA]A+ (Stable)',
   'page_numbers': [1]},
  {'instrument': 'Issuer rating',
   'rating': '[ICRA]A+ (Stable)',
   'page_numbers': [1]}],
 'net_worth': {'company_net_worth': 'Rs. 2,433 crore', 'page_numbers': [2]},
 'company_info': {'about_company': 'Oxyzo is a Gurgaon-based NBFC, which commence

In [30]:
results

Unnamed: 0,instrument,rating,Credit Rating Page No.,company_net_worth,Net Worth Page No.,about_company,Company Info Page No.
0,Long term – Fund based/CC,[ICRA]A+ (Stable),1,"Rs. 2,433 crore",2.0,"Oxyzo is a Gurgaon-based NBFC, which commenced...",4.0
1,Long term – Fund-based TL,[ICRA]A+ (Stable),1,,,,
2,Long term – Unallocated,[ICRA]A+ (Stable),1,,,,
3,LT-Market linked debenture,PP-MLD[ICRA]A+ (Stable),1,,,,
4,Commercial paper programme,[ICRA]A1+,1,,,,
5,Non-convertible debenture,[ICRA]A+ (Stable),1,,,,
6,LT borrowing programme,[ICRA]A+ (Stable),1,,,,
7,Issuer rating,[ICRA]A+ (Stable),1,,,,


## Claude's PDF Beta

In [43]:
import os
import pandas as pd
import anthropic
import base64
client_claude = anthropic.Anthropic()

In [44]:
os.environ['ANTHROPIC_API_KEY'] = getpass.getpass('Enter Anthropic API key')

Enter Anthropic API key ········


### Things I need:
<ul> 
    <li>
        List of Financial instruments and their rating and page number you find from.
    </li>
    <li>
       Company net worth and page number you find from.
    </li>
    <li>
        About company and page number you from find from
    </li>
    <li> 
        Financial key metrics:
        <li> 
            Proft after tax (PAT) 
        </li>
        <li>
            Total operating income / operating income
        </li>
         <li>
             AUM
         </li>
         Following above along with page number and for both most recent and next recent years
    
   </li>
</ul>

In [37]:
tool_function = {

        "name": "extract_credit_rating_info",
        "description": "Extract key details from a credit rating report PDF",
        "input_schema": {
            "type": "object",
            "properties": {
                "credit_ratings": {
                "type": "array",
                "items": {
                    "type": "object",
                    "properties": {
                        "instrument": {"type": "string", "description": "Name of the financial instrument"},
                        "rating": {"type": "string", "description": "The credit rating assigned"},
                        "page_numbers": {
                            "type": "array",
                            "items": {"type": "integer"},
                            "description": "Page numbers where this data is found"
                        }
                    },
                },
                "description": "List of credit ratings for different financial instruments."
            },
            "net_worth": {
                "type": "object",
                "properties": {
                    "company_net_worth": {"type": "string", "description": "Net worth of the company"},
                    "page_numbers": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "description": "Page numbers where net worth information is found"
                    }
                },
                "description": "Net worth of the company and its page numbers."
            },
            "company_info": {
                "type": "object",
                "properties": {
                    "about_company": {"type": "string", "description": "General information about the company"},
                    "page_numbers": {
                        "type": "array",
                        "items": {"type": "integer"},
                        "description": "Page numbers where company information is found"
                    }
                },
                "description": "General information about the company and its page numbers."
            },
        
                "PAT": {
                    "type": "object",
                    "description": "Profit After Tax (PAT) details of company",
                    "properties": {
                        "most_recent": {"type": "string", "description": "Most recent or latest year's PAT along with year"},
                        "most_recent_year" :{ "type": "integer", "description": "Most recent year"},
                        "next_recent": {"type": "string", "description": "Next recent year's PAT along with year"},
                        "next_recent_year" :{ "type": "integer", "description": "Next recent year after most recent year"},                   
                        "page_numbers": {"type": "array", "items": {"type": "integer"}, "description": "Page numbers where PAT is found"}
                    },
                    "required": ["most_recent", "next_recent", "page_numbers"]
                },
                "operating_income": {
                    "type": "object",
                    "description": "Operating income details",
                    "properties": {
                        "most_recent": {"type": "string", "description": "Most recent or latest year's operating income along with year"},
                         "most_recent_year" :{ "type": "integer", "description": "Most recent year"},
                        "next_recent": {"type": "string", "description": "Next recent year's operating income along with year"},
                        "next_recent_year" :{ "type": "integer", "description": "Next recent year after most recent year"},   
                        "page_numbers": {"type": "array", "items": {"type": "integer"}, "description": "Page numbers where operating income is found"}
                    },
                    "required": ["most_recent", "next_recent", "page_numbers"]
                },
                "AUM": {
                    "type": "object",
                    "description": "Assets Under Management (AUM) details",
                    "properties": {
                        "most_recent": {"type": "string", "description": "Most recent  or latest year's AUM along with year"},
                        "most_recent_year" :{ "type": "integer", "description": "Most recent year"},
                        "next_recent": {"type": "string", "description": "Next recent year's AUM along with year"},
                        "next_recent_year" :{ "type": "integer", "description": "Next recent year after most recent year"},   
                        "page_numbers": {"type": "array", "items": {"type": "integer"}, "description": "Page numbers where AUM is found"}
                    },
                    "required": ["most_recent", "next_recent", "page_numbers"]
                }
            },
            "required": ["financial_instruments", "company_net_worth", "about_company", "PAT", "operating_income", "AUM"]
},
}
    


In [38]:
def get_anthropic_reply(image_path, model = "claude-3-5-sonnet-20241022"):
    
    system_prompt = '''
    You are a credit rating report analysis assistant. You will be provided with text. You have to return:
    1.) Credit Ratings of different financial instruments along with the name of financial instruments
    2.) Net worth of company: Net worth of a company refers to the value that remains after subtracting total liabilities from total assets. The net worth is also known as shareholders' equity, book value, or net assets. 
    3.) About the company: General information about company like what it do and what is its business
    4.) Profit After Tax (PAT) of company in most recent/latest year available in text and in year just previous to most recent year. Please provide year too in both cases.
    5.) Operating income or  Total operating income of company in most recent/latest year available in text and in year just previous to most recent year. Please provide year too in both cases.
    6.) AUM (asset under managemet) or Total AUM  in most recent/latest year available in text and in year just previous to most recent year. Please provide year too in both cases.
    
    
    Along with above data, you also have to return the **page number** where you get the above data.
    In case, if you are not able to get any of above information just return **None** in that case.

                    
    '''
    user_prompt = '''
    Given is the credit rating report of a company.
    I want you to extract following details for me:
    1.) Credit Ratings of different financial instruments
    2.) Net Worth
    3.) Info about company
    4.) PAT
    5.) Operating income
    6.) AUM
    You must return each of the above detail from report. In case, if you are not able to find any of them, 
    then just return 'None' in the front of detail which will indicate that it is not found.
    ''' 
    def encode_pdf(image_path):
        with open(image_path, "rb") as pdf_file:
            pdf_data = base64.standard_b64encode(pdf_file.read()).decode("utf-8")
            
        return pdf_data

    
    response = client_claude.beta.messages.create(
        model= model,
        betas=["pdfs-2024-09-25"],
        system = system_prompt,
        max_tokens = 700,
        messages=[
            {
                "role": "user",
                "content": [
                    {
                        "type": "document",
                        "source": {
                            "type": "base64",
                            "media_type": "application/pdf",
                            "data": encode_pdf(image_path),
                        }
                    },
                    {
                        "type": "text",
                        "text": user_prompt
                    }
                ]
            }
        ],
        tools = [tool_function]
    
    )

    return response

In [39]:
def create_dataframe(data):

    try:
        
        keys =  data.keys()
        result_df = []
        for key in keys:
            temp_df = pd.DataFrame(data[key])
            # temp_df['page_numbers'] = temp_df['page_numbers'].apply(lambda x: ', '.join(map(str,x)))
            temp_df.rename(columns={"page_numbers": f"{key} Page No."}, inplace=True)
            if key in ['PAT','AUM','operating_income']:
                temp_df.rename(columns = {"most_recent": f"{key}(latest yr)"}, inplace =True)
                temp_df.rename(columns = {"next_recent": f"{key}"}, inplace =True)
            result_df.append(temp_df)
            
        final_df = pd.concat(result_df,axis=1)

        return final_df

    except Exception as e:
        print(f"Error occured in creating a final dataframe: {e}")
        return f"Error occured in creating a final dataframe: {e}"


## Check here - III

In [45]:
company_name = input("Enter Company Name: ")
data= fetch_latest_credit_rating(company_name, api_key)
print("Information is Fetched from Google Successfully")
reply = get_gpt_reply(company_name,data)
print("Response is taken from GPT Successfully")
print(reply)
url = reply['url']
download_pdf(url)

filename = f"{company_name}_CR.pdf"  # Replace with your PDF file path
flag,extracted_text = extract_text_with_pdfplumber(filename)

if (flag):
    # make_tables(filename)
    token_count = calculate_tokens(extracted_text)
    print(f"Token count for the input text: {token_count}")
    if (token_count>15000):
        print("Token Count hit the limit, so stopping the code")
    else:
        extracted_info  = get_anthropic_reply(filename)
        print("Required Information is extracted fom pdf")
        data= extracted_info.content[1].input
        results =  create_dataframe(data)
        display(results)

Token count for the input text: 6621
Required Information is extracted fom pdf
Error occured in creating a final dataframe: 'BetaMessage' object has no attribute 'keys'


"Error occured in creating a final dataframe: 'BetaMessage' object has no attribute 'keys'"

In [49]:
results =  create_dataframe(extracted_info.content[1].input)
display(results)


Unnamed: 0,instrument,rating,credit_ratings Page No.,company_net_worth,net_worth Page No.,about_company,company_info Page No.,PAT(latest yr),most_recent_year,PAT,next_recent_year,PAT Page No.,operating_income(latest yr),operating_income,operating_income Page No.
0,Long term – Fund based/CC,[ICRA]A+ (Stable),[1],"Rs. 2,433 crore as on September 30, 2023",2.0,"Oxyzo is a Gurgaon-based NBFC, which commenced...",3.0,Rs. 135 crore in H1 FY2024,2024.0,Rs. 197 crore in FY2023,2023.0,2.0,,,
1,Long term – Fund-based TL,[ICRA]A+ (Stable),[1],,,,,,,,,,,,
2,Long term – Unallocated,[ICRA]A+ (Stable),[1],,,,,,,,,,,,
3,LT-Market linked debenture,PP-MLD[ICRA]A+ (Stable),[1],,,,,,,,,,,,
4,Commercial paper programme,[ICRA]A1+,[1],,,,,,,,,,,,
5,Non-convertible debenture,[ICRA]A+ (Stable),[1],,,,,,,,,,,,
6,LT borrowing programme,[ICRA]A+ (Stable),[1],,,,,,,,,,,,
7,Issuer rating,[ICRA]A+ (Stable),[1],,,,,,,,,,,,


## Gradio

In [55]:
import gradio as gr
import pandas as pd
import time
import base64

def process_company(company_name):

    data= fetch_latest_credit_rating(company_name, serpapi_key)
    yield "Information is Fetched from Google Successfully"
    
    reply = get_gpt_reply(company_name,data)
    yield "Response is taken from GPT Successfully"
    yield json.dumps(reply)
    
    url = reply['url']
    response = download_pdf(url,company_name)
    yield response
    
    filename = f"{company_name}_CR.pdf"  # Replace with your PDF file path
    flag,extracted_text = extract_text_with_pdfplumber(filename)
    if( flag==0):
        yield extracted_text

    results = pd.DataFrame()
    if (flag):
        # make_tables(filename)
        token_count = calculate_tokens(extracted_text)
        yield f"Token count for the input text: {token_count}"
        if (token_count>15000):
            yield "Token Count hit the limit, so stopping the code"
        else:
            extracted_info  = get_anthropic_reply(filename)
            extracted_info = extracted_info.content[1].input
            yield "Required Information is extracted fom pdf"
            results =  create_dataframe(extracted_info)

    yield "Execution Completed!!", results
    

def run_analysis(company_name, password):
    if password != "Enter your password":  # Enter your password here...
        yield gr.update(value="Invalid password! Please try again."), None
        return

    progress = []
    yield gr.update(value="Starting analysis...\n"), None

    # Process company name and yield intermediate updates
    for update in process_company(company_name):
        if isinstance(update, str):  # If it's a log message
            progress.append(update)
            yield gr.update(value="\n".join(progress)), None  # Update log
        else:  # If it's the final DataFrame
            message, df = update
            progress.append(message)
            yield gr.update(value="\n".join(progress)), df  # Update log and DataFrame

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("<h1 style='text-align: center; color: white;'>Credit Reporting Analysis Tool</h1>")
    gr.Markdown("<p style='text-align: center; color: white;'>Enter the company name to start the analysis.</p>")
    
    with gr.Row():
        company_name_input = gr.Textbox(label="Company Name", placeholder="Enter company name here")
        password_input = gr.Textbox(label="Password", type="password", placeholder="Enter password here")
    
    log_output = gr.Textbox(label="Progress Log", lines=10, interactive=False)
    dataframe_output = gr.Dataframe(label="Final Output")
    
    analyze_button = gr.Button("Run Analysis")
    analyze_button.click(run_analysis, inputs=[company_name_input, password_input], outputs=[log_output, dataframe_output])

demo.launch(share=True)


* Running on local URL:  http://127.0.0.1:7862
* Running on public URL: https://7721d9d89780077107.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


