In [4]:
# ✅ 1. LangChain Document Loaders (for PDFs, URLs, Text)
from langchain.document_loaders import PyPDFLoader, WebBaseLoader, TextLoader

# ✅ 2. LangChain Text Processing (for Splitting Reports)
from langchain.text_splitter import RecursiveCharacterTextSplitter

# ✅ 3. Hugging Face Transformers (for FinBERT Sentiment Analysis)
from transformers import pipeline

# ✅ 4. LangChain LLMChain (if using an LLM for sentiment instead of FinBERT)
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI

# ✅ 5. Pandas (for Storing Sentiment Data in RL Dataset)
import pandas as pd

# ✅ 6. yFinance (for Stock Data if Needed)
import yfinance as yf




In [2]:
!pip install langchain
!pip install transformers
!pip install openai
!pip install yfinance



In [6]:
!pip install -U langchain-community
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.3.1-py3-none-any.whl.metadata (7.3 kB)
Downloading pypdf-5.3.1-py3-none-any.whl (302 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.0/302.0 kB[0m [31m9.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.3.1


In [18]:
from langchain.document_loaders import PyPDFLoader

pdf_loader = PyPDFLoader("/content/maz1_2022.pdf")
docs = pdf_loader.load()


In [19]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
chunks = splitter.split_documents(docs)


In [20]:
from transformers import BertForSequenceClassification, BertTokenizer

model_path = "/content/drive/MyDrive/finbert_kdave_trained"
finbert = BertForSequenceClassification.from_pretrained(model_path)
tokenizer = BertTokenizer.from_pretrained(model_path)

In [21]:
from transformers import BertForSequenceClassification, BertTokenizer, pipeline

# This is to create a sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)


# here we analyze the sentiment for each chunk that we created
sentiment_scores = [sentiment_pipeline(chunk.page_content) for chunk in chunks]

# Aggregate scores (average sentiment across all chunks)
final_sentiment = sum([score[0]["score"] for score in sentiment_scores]) / len(sentiment_scores)

print(f"Quarterly Report Sentiment Score: {final_sentiment}")


Device set to use cpu


Quarterly Report Sentiment Score: 0.7661232550938925


In [22]:
print(docs[0].page_content[:5000])  # to show first 500 characters


RR SO Ue ey Swat St 
(HR ALG GT FIA) 
Mazagon Dock Shipbuilders Ltd. 
(Formerly Mazagon Dock Limited) 
(A Govt. of India Undertaking) 
CIN : U35100MH1934G01002079 
Bead te, Argrtia, Aas - 400 010 
Dockyard Road, Mazagon, Mumbai - 400 010 
Certified - ISO 9007-2015 
deal pais 
Ref. No. 
Rae 10 August 2022 
Date 
To To 
BSE Limited National Stock Exchange of 
Phiroze Jeejeebhoy Towers India Limited 
Dalal Street, Exchange Plaza, C-1, Block G 
Mumbai- 400 001 Bandra Kurla Complex 
Scrip Code: 543237 Bandra (E), Mumbai- 400 051 
NSE Symbol : MAZDOCK 
Sub: Corporate Investors Presentation on Company’s Performance for the first quarter of FY 2022-2023 
—Reg 
Dear Sir/Madam 
  
Reference the captioned subject, kindly note that Corporate Investor Presentation on Company’s 
Performance for the first quarter of FY 2022-2023 has been uploaded on Mazagon Dock Shipbuilders 
Limited website, a copy of which is enclosed herewith for your kind reference. 
This is for your kind information and dissemin