In [10]:
!pip install pdfplumber

Collecting pdfplumber
  Downloading pdfplumber-0.11.4-py3-none-any.whl.metadata (41 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Downloading pdfplumber-0.11.4-py3-none-any.whl (59 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.2/59.2 kB[0m [31m3.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pdfminer.six-20231228-py3-none-any.whl (5.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

  ##!pip install sentence-transformers
  ##!!pip install tiktoken
  ##!pip install langchain-community faiss-gpu
  ##!pip install langchain-google-genai

In [86]:
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.schema import Document
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains.question_answering import load_qa_chain
from langchain.schema import Document
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [89]:
import pdfplumber
import pandas as pd
from pdfplumber.utils import extract_text, get_bbox_overlap, obj_to_bbox

def process_pdf(pdf_path):
    pdf = pdfplumber.open(pdf_path)
    all_text = []
    df_list = []
    for page in pdf.pages:
        filtered_page = page
        chars = filtered_page.chars

        for table in page.find_tables():
            first_table_char = page.crop(table.bbox).chars[0]
            filtered_page = filtered_page.filter(lambda obj:
                get_bbox_overlap(obj_to_bbox(obj), table.bbox) is None
            )
            chars = filtered_page.chars

            df = pd.DataFrame(table.extract())
            df.columns = df.iloc[0]
            markdown = df.drop(0).to_markdown(index=False)
            df_list.append(df)
            chars.append(first_table_char | {"text": markdown})

        page_text = extract_text(chars, layout=True)
        all_text.append(page_text)

    pdf.close()
    return "\n".join(all_text)
    # return df_list


In [90]:
def get_chunks(file_path):
  with open(file_path, 'r', encoding='utf-8') as file:
    text = file.read()
  text_splitter = RecursiveCharacterTextSplitter(
      chunk_size=1000,
      chunk_overlap=200,
      add_start_index=True,
      )
  chunks = text_splitter.split_text(text)
  return chunks

In [91]:
def get_vector_Store(chunks):
  embedding_modelname = "sentence-transformers/all-mpnet-base-v2"
  model_kwargs = {'device': 'cpu'}
  embeddings = HuggingFaceEmbeddings(model_name=embedding_modelname, model_kwargs=model_kwargs)
  documents = [Document(page_content=chunk) for chunk in chunks]
  vectorstore=FAISS.from_documents(documents,embeddings)
  vectorstore.save_local("faiss_index")


In [30]:
#retriever=persist_vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [92]:
import os
# from dotenv import load_dotenv
# load_dotenv()

def get_conversational_chain():

  persist_vectorstore=FAISS.load_local("faiss_index",embeddings,allow_dangerous_deserialization=True)

  prompt_template="""You are a highly analytical and very smart Generative AI assistant. You will receive a query and use the relevant information from a retrieved set of documents to answer it. Follow these steps:\n"
            "1. Retrieve the most relevant documents based on the query.\n"
            "2. Carefully analyze the retrieved documents in relation to the query.\n"
            "3. Provide a concise and direct answer to the query, based only on the information found in the documents.\n"
            "4. If the answer is not available in the retrieved documents, respond with 'Answer: answer not available in the context'.\n"
            "5. Ensure that the answer is specific, relevant, and concise. Avoid including irrelevant information.\n"

            Context:\n {context}?\n
            Question:\n {question}\n

            "Answer the query in a clear sentence directly after 'Answer: '.
            """
  model = ChatGoogleGenerativeAI(
      model="gemini-1.5-pro",
      temperature=0,
      api_key= "AIzaSyDCZOa3qR6W2F-L3mV9hIP18pY0zmitblQ"
      )
  prompt=PromptTemplate(template=prompt_template,input_variables=["context","question"])
  chain=load_qa_chain(model,chain_type="stuff",prompt=prompt)
  return chain




In [93]:
def user_input(user_query):
  docs=persist_vectorstore.similarity_search(user_query)
  chain=get_conversational_chain()

  response=chain(
      {"input_documents":docs,"question":user_query}
      ,return_only_outputs=True
      )
  return response['output_text']

In [94]:
pdf_path = r"Paper18_Set1_Sol.pdf"
extracted_text = process_pdf(pdf_path)

file_path = '/content/extracted_file.txt'
with open(file_path, 'w') as file:
    file.write(extracted_text)

chunks=get_chunks(file_path)
get_vector_Store(chunks)


In [95]:
query1 = "what is the things we should declare before applying for GST registration"

result = user_input(query1)

print(result)

Answer: Legal name of business, PAN, mobile number, e-mail address, and state or union territory.



In [73]:
# qa=RetrievalQA.from_chain_type(llm=llm,chain_type="stuff",retriever=retriever)

In [97]:
  query1="Describe the power and duties of Anti-profiteering committee. "
  result=user_input(query1)
  print(result)

Answer: The Anti-profiteering Authority determines if tax rate reductions or input tax credit benefits are passed on to recipients via commensurate price reductions, identifies registered persons who haven't, and can order price reductions.



In [98]:
query1="what is the Total CIF value/ Assessable Value"
result=user_input(query1)
print(result)
#correct
#correct

Answer: 1,12,648



In [99]:
query1="what is the value in for this 'Input tax credit which are used to supply taxable as well as exempted output supplies'"
result=user_input(query1)
print(result)
#correct
#correct

Answer: ` 54,000



In [101]:
query1="""crown Beers India Pvt. Ltd., supplies raw material to a job worker
Kareena Ltd. for manufacture of alcoholic liquor for human consumption.
After completing the job-work, the finished product of 5,000 beer bottles
are returned to Crown Beers India Pvt. Ltd., putting the retail sale price as
`200 on each bottle (inclusive of duties and taxes). Kareena Ltd., charged
100 per bottle as job work charges of carrying out of intermediate
production process of alcoholic liquor for human consumption from Crown
Beers India Pvt. Ltd. Find the GST liability if rate is 18% (CGST 9% and
SGST 9%) in the hands of Kareena Ltd."""
result=user_input(query1)
print(result)
#correct
#correct

Answer: Kareena Ltd.'s GST liability is ₹90,000, with ₹45,000 each for CGST and SGST (5,000 bottles x ₹100 x 9%).



In [102]:
query1="""Bharat Gas sells cooking gas cylinders. Subsidy directly transferred to the
account of the customer. Selling price per cylinder is ` 800. Customer
received subsidy ` 200 directly from Government to his bank account. Net
outflow of the buyer is ` 600. Find the value of supply of goods (per
cylinder) in the hands of Bharat Gas.  """
result=user_input(query1)
print(result)
#correct
#correct

Answer: The value of supply of goods per cylinder in the hands of Bharat Gas is ₹800.



In [103]:
query1="""
Raman Hotels supplying only accommodation services in Chennai. Turnover of
Raman Hotels is less than 20 Lakhs. Raman Hotels listed hotel on online platform
namely Makemytrip.
The following categories of rooms get booked by the Makemytrip company who
pay to Raman Hotels after deducting their commission.
(A)  Declared value per room (category 1), Non AC Room `950 per Night.
(B)
Declared value per room (category 2), AC Room `1,800 per Night.
(C)  Declared value per room (category 3), AC Room `7,000 per Night, where
additional bed `1,800 per Night.
(D)  Declare value per room (category 4), AC Room `10,000 per Night, but
amount charged is `7000.
You are required to answer:
(1)
Who is liable to pay GST and
(2)
Net GST liability.
"""
result=user_input(query1)
print(result)

Answer: Makemytrip is liable to pay GST under section 9(5) of the CGST Act, 2017, as they are an electronic commerce operator supplying accommodation services.  The net GST liability cannot be calculated from the provided context, as the number of bookings for each room category is not specified.



In [None]:
#old answer

Based on the context provided, here's my attempt to answer the question:

**(1) Who is liable to pay GST?**

According to the CGST Act, 2017, the supplier of goods or services (in this case, Raman Hotels) is liable to pay GST.

**(2) Net GST liability.**

To determine the net GST liability, we need to consider each category of room and calculate the taxable value and GST liability separately.

Let's break down the categories:

(A) Declared value per room: `950 per Night
GST rate: 9% CGST + 9% SGST = 18%
Taxable value: Not clear (no additional information provided)
However, since Raman Hotels has a turnover less than `20 Lakhs and is not registered, the tax liability would be on the consumer (Makemytrip). But for calculation purposes, assuming the tax was passed on to the customer by Makemytrip.

(B) Declared value per room: `1,800 per Night
GST rate: 9% CGST + 9% SGST = 18%
Taxable value: Not clear (no additional information provided)
However, since Raman Hotels has a turnover less tha

In [104]:
query1="what is the Value of taxable services for a tamil movie"
result=user_input(query1)
print(result)

Answer: The value of taxable services for a Tamil movie is ₹1,98,000.



In [107]:
query1="what is the abbreviation of RoDTEP"
result=user_input(query1)
print(result)

Answer: RoDTEP stands for Remission of Duties and Taxes on Exported Products.



In [106]:
query1="what is the mode of issue for RoDTEP"
result=user_input(query1)
print(result)
#correct
#RoDTEP is issued in the form of transferable duty credits.

Answer: RoDTEP is issued in the form of transferable duty credit/electronic scrip.



In [108]:
query1="is RoDTEP transferable"
result=user_input(query1)
print(result)
#correct
#correct

Answer: Yes, RoDTEP is transferable in the open market.



In [110]:
query1="what is the freight amount for the imported goods"
result=user_input(query1)
print(result)
#correct
#correct

Answer: The freight amount for the imported goods is ₹7,896.



In [112]:
query1="what is the total Assessable Value for the imported goods "
result=user_input(query1)
print(result)
#incorrect

Answer: The assessable value for the imported goods is 230,000 Yen.



In [115]:
query1="value for taxable supply of goods"
result=user_input(query1)
print(result)
#correct

Answer: The value of taxable supply of goods is ₹2,00,000.

