In [1]:
import os
from dotenv import load_dotenv
from langchain_google_genai import ChatGoogleGenerativeAI


load_dotenv()
llm_model = ChatGoogleGenerativeAI(model="gemini-2.5-flash",api_key = os.environ[("GOOGLE_API_KEY")]) 

In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain.pydantic_v1 import Field , BaseModel
from typing import Optional
from datetime import date
from langchain_core.output_parsers import JsonOutputParser

class TaskRequest(BaseModel):
    action: str = Field(..., description="Action type: add, update, delete, list")
    title: Optional[str] = Field(None, description="The title of the task (required for add/update/delete)")
    task_date: Optional[date] = Field(None, description="The date of the task (optional, defaults to today)")


task_request_parser = JsonOutputParser(pydantic_object=TaskRequest)

DB_SCHEMA_PROMPT = """
The database has two tables:
   
Table: tasks
- id (PRIMARY KEY, AUTO_INCREMENT, INT)
- task_date (DATE)
- title (VARCHAR)
- user_email (VARCHAR, FOREIGN KEY → users.email)
"""


MAIN_PROMPT = ChatPromptTemplate.from_messages([("system",'''You are a GPA calculator assistant.
Compute and provide calculated gpa from the given subjects with their credit points, nothing else.
Return the query in 'JSON object' with 'answer' as key.
{format_instructions} 

Use the schema below:
{DB_SCHEMA_PROMPT}

Rules:
1. You will receive a structured JSON request following the TaskRequest schema.
2. User email is never provided. Always use '{{session_user_email}}' for user_email.
3. If task_date is missing in ADD, use CURRENT_DATE.
4. For UPDATE and DELETE, require both id and user_email.
5. Always return raw SQL only (no explanations, no markdown).'''),
                                                ("human","{user_input}")]).partial(format_instructions=task_request_parser.get_format_instructions(),DB_SCHEMA_PROMPT=DB_SCHEMA_PROMPT)


For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`
with: `from pydantic import BaseModel`
or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. 	from pydantic.v1 import BaseModel

  exec(code_obj, self.user_global_ns, self.user_ns)


In [131]:
user_input = """
1
OFFICE OF THE CONTROLLER OF EXAMINATIONS
PROVISIONAL RESULTS OF END SEMESTER  EXAMINATIONS November / December -2024
Register Number: 110322104054 Date of Publication: 12-02-2025
Student Name : SUJITH S Regulations            : 2021
Branch : COMPUTER SCIENCE ENGINEERING Gender                    : Male
Degree : B.E D.O.B                       : 03-06-2005
S.No SEMESTER COURSE CODE COURSE NAME GRADE RESULT
1 5SEM CB3491 Cryptography And Cyber Security B+ PASS
2 5SEM CCS346 Exploratory Data Analysis (T & P) A PASS
3 5SEM CCS362 Security and Privacy in Cloud  (T&P) A PASS
4 5SEM CCS363 Social Network Security (T&P) A PASS
5 5SEM CCS366 Software Testing And Automation (T & P) A PASS
6 5SEM CS3501 Compiler Design (T&P) A+ PASS
7 5SEM CS3551 Distributed Computing A PASS
8 5SEM CS3591 Computer Networks  (T & P) B+ PASS
9 5SEM MX3084 Disaster Risk Reduction and Management COMPLETED PASS"""

SUBJECT EXTRACTOR

In [132]:
from langchain.prompts import ChatPromptTemplate , HumanMessagePromptTemplate , SystemMessagePromptTemplate
from langchain_core.output_parsers import StrOutputParser

grade_converter = "O=10,A+=9,A=8,B+=7,B=6,C+=5,C=4"
Base_prompt = ChatPromptTemplate(messages=[SystemMessagePromptTemplate.from_template("You are a text summarizer & extractor, extract the SUBJECT CODE and SUBJECT GRADE from given text , convert the SUBJECT GRADE from alphabets to numeric value br using {grade_converter}, Return in JSON OBJECT as 'answer' as key "),
                                           HumanMessagePromptTemplate.from_template(user_input)]) 

fine_user_input_chain = Base_prompt | llm_model | StrOutputParser()
fine_user_input = fine_user_input_chain.invoke({"user_input":user_input , "grade_converter":grade_converter})

In [133]:
fine_user_input

'```json\n{\n  "answer": [\n    {\n      "SUBJECT CODE": "CB3491",\n      "SUBJECT GRADE": 7\n    },\n    {\n      "SUBJECT CODE": "CCS346",\n      "SUBJECT GRADE": 8\n    },\n    {\n      "SUBJECT CODE": "CCS362",\n      "SUBJECT GRADE": 8\n    },\n    {\n      "SUBJECT CODE": "CCS363",\n      "SUBJECT GRADE": 8\n    },\n    {\n      "SUBJECT CODE": "CCS366",\n      "SUBJECT GRADE": 8\n    },\n    {\n      "SUBJECT CODE": "CS3501",\n      "SUBJECT GRADE": 9\n    },\n    {\n      "SUBJECT CODE": "CS3551",\n      "SUBJECT GRADE": 8\n    },\n    {\n      "SUBJECT CODE": "CS3591",\n      "SUBJECT GRADE": 7\n    },\n    {\n      "SUBJECT CODE": "MX3084",\n      "SUBJECT GRADE": "COMPLETED"\n    }\n  ]\n}\n```'

DATA LOADER

In [78]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import  PyPDFLoader

Data_document = PyPDFLoader(file_path="D:\\NameKart\\React + SpringBoot\\ChatBot\\Resource\\B.E.CSE (1).pdf")  
extracted_data = Data_document.load()

# Split data
splitter = RecursiveCharacterTextSplitter(separators=["\n"] , is_separator_regex=False, chunk_size=300,chunk_overlap=100)
data_splitter = splitter.split_documents(extracted_data)



EMBEDDING + FAISS

In [34]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

embedding_model = GoogleGenerativeAIEmbeddings(model="gemini-embedding-001")
vector_db = FAISS.from_documents(embedding=embedding_model , documents=data_splitter)



EXTRATOR_DATA

In [134]:
retriver = vector_db.as_retriever(search_kwargs={"k":800})
request = retriver.invoke(input=fine_user_input)

In [135]:
for i in request:
    print(i)
    print("##############################")

page_content='and Machine Learning 2 1 2 2 1 - - - 2 2 2 3 2 2 2 
  Database Management 
Systems  2 2 3 2 1 - - - 2 2 2 2 2 2 3 
  Algorithms 2.67 1.8 3 1   1.33     1  1 1 
  Introduction to 
Operating Systems  2 2 2 2 1 - - - 2 2 2 2 1 2 2 
  Environmental 
Sciences and 
Sustainability' metadata={'producer': 'Microsoft® Word 2013', 'creator': 'Microsoft® Word 2013', 'creationdate': '2024-03-18T16:01:50+05:30', 'moddate': '2024-03-18T16:17:59+05:30', 'author': 'Admin', 'source': 'D:\\NameKart\\React + SpringBoot\\ChatBot\\Resource\\B.E.CSE (1).pdf', 'total_pages': 411, 'page': 3, 'page_label': '4'}
##############################
page_content='4 
 
  Object Oriented 
Programming 2 1 2 2 2 - - - 2 2 1 2 3 2 2 
  Data Structures 
Laboratory 2 2 2 1 2 - - - 2 2 2 2 2 2 3 
  Object Oriented 
Programming 
Laboratory 
2 2 2 2 2 - - - 2 2 2 2 2 2 2 
  Data Science 
Laboratory 2 2 2 2 1 - - - 2 2 2 2 2 3 2 
  Professional 
Development$' metadata={'producer': 'Microsoft® Word 2013', 'creator': 

In [136]:
from langchain_core.runnables import RunnableParallel
from operator import itemgetter
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain_core.output_parsers import JsonOutputParser   # ✅ new location

formula = "((((SUBJECT_GRADE)/10)*SUBJECT_CREDIT)+......+n)/TOTAL_CREDIT_USED"

prompt = ChatPromptTemplate(
    messages=[
        SystemMessagePromptTemplate.from_template(
            """You are a calculator AI.
Rules:
1. Perform the calculation exactly as per the formula.
2. Return ONLY a JSON object with the result:
   {{ "answer": <numeric_result> ,"subject code : formula points for each"}}
3. If any error occurs (e.g., missing values, invalid format), return ONLY:
   {{ "error": "<short error message and all missing subject code >" }}"""
        ),
        HumanMessagePromptTemplate.from_template("""
                                                 Here is the JSON input with SUBJECT CODE and SUBJECT CREDIT: {question}.
                                                 Here is the reference CREDIT: {context}.
                                                 Here is the formula to apply: {formula}.
                                                 """)
    ]
)

parser = JsonOutputParser()

chain = (
    RunnableParallel(
        {
            "context": itemgetter("context"),
            "question": itemgetter("question"),
            "formula": itemgetter("formula")
        }
    )
    | prompt
    | llm_model
    | parser
)

result = chain.invoke(
    {"question": fine_user_input, "context": request, "formula": formula}
)
print(result)


{'answer': 0.7884615384615384, 'subject code : formula points for each': {'CB3491': '(7/10)*3 = 2.1', 'CCS346': '(8/10)*3 = 2.4', 'CCS362': '(8/10)*3 = 2.4', 'CCS363': '(8/10)*3 = 2.4', 'CCS366': '(8/10)*3 = 2.4', 'CS3501': '(9/10)*4 = 3.6', 'CS3551': '(8/10)*3 = 2.4', 'CS3591': '(7/10)*4 = 2.8'}}
