In [6]:
pip install flask flask-cors langchain langchain-community pdfplumber faiss-cpu openai python-dotenv pydantic


Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting aiohttp<4.0.0,>=3.8.3 (from langchain-community)
  Downloading aiohttp-3.11.18-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting aiohappyeyeballs>=2.3.0 (from aiohttp<4.0.0,>=3.8.3->langchain-community)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp<4.0.0,>=3.8.3->langchain-c

In [28]:
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
from langchain.document_loaders import PDFPlumberLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.memory import ConversationBufferWindowMemory
from langchain.prompts import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts import ChatPromptTemplate
from pydantic import BaseModel, Field
import openai
from dotenv import load_dotenv

app = Flask(__name__)
CORS(app)

load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

county_file_path = "."

text_chunks = []
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)

for file_name in os.listdir(county_file_path):
    if file_name.lower().endswith(".pdf"):
        file_path = os.path.join(county_file_path, file_name)
        loader = PDFPlumberLoader(file_path)
        doc = loader.load()
        for page in doc:
            page.metadata['file_path'] = file_path
        text_chunks.extend(text_splitter.split_documents(doc))

embeddings = OpenAIEmbeddings()
vectordb = FAISS.from_documents(text_chunks, embedding=embeddings)

# --- Prompt Template ---
template = """You are a Credit Scoring Assistant tasked with evaluating an individual's creditworthiness...
Question: {question}
=========
{context}
=========
Answer: """

QA_PROMPT = PromptTemplate(template=template, input_variables=["question", "context"])

memory = ConversationBufferWindowMemory(memory_key='chat_history', input_key='question', output_key='answer', return_messages=True, k=10)

llm = ChatOpenAI(model_name="gpt-4o-mini", temperature=0)

conversation_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever(search_type='similarity', search_kwargs={'k': 5}),
    return_source_documents=True,
    memory=memory,
    max_tokens_limit=2000,
    combine_docs_chain_kwargs={"prompt": QA_PROMPT}
)

class ResultScore(BaseModel):
    score: int = Field(description="Based on the answer, pick the appropriate score")
    reason: str = Field(description="Explain why this score was given based on the answer.")
    eligability: bool = Field(description="Based on the answer, pick if the answer is eligible or not")

result_score_parser = PydanticOutputParser(pydantic_object=ResultScore)
result_score_format_instructions = result_score_parser.get_format_instructions()

def extract_score(answer_text):
    prompt_template = ChatPromptTemplate.from_template(
        template="Evaluate the following answer and assign a score along with the reason.\n{format_instructions}\n\nAnswer:\n{message_content}"
    )
    formatted_prompt = prompt_template.format_prompt(
        format_instructions=result_score_format_instructions,
        message_content=answer_text
    )
    response = llm.invoke(formatted_prompt.to_messages())
    return result_score_parser.parse(response.content)

def generate_question_for_section(section):
    # Dynamic prompt for generating a question based on section
    prompt = f"Given the section '{section}', generate a relevant question that could be asked to evaluate someone's creditworthiness.If the section is not 'Age' then complement the previous answer. Ask simple and short questions."
    
    question = llm.invoke(prompt)
    print(question)
    answer = question.content
    print(answer)
    return answer

@app.route('/get_question', methods=['POST'])
def get_question():
    data = request.json
    section = data.get("section")
    
    if not section:
        return jsonify({"error": "Section is required"}), 400

    # Generate a dynamic question based on the section
    question = generate_question_for_section(section)
    
    return jsonify({"question": question})

# --- API Route ---
@app.route('/evaluate', methods=['POST'])
def evaluate():
    data = request.json
    section = data.get("section")
    question = data.get("question")
    answer = data.get("answer")

    if not all([section, question, answer]):
        return jsonify({"error": "Missing required fields: section, question, or answer"}), 400

    # full_prompt = f"Evaluate the following section of: {section} having the question: {question} where the user answered: {answer}."
    full_prompt = f"Evaluate the following section of : {section} having the question of {question} where the user answered as {answer} answer and assign the score/points from the document along with the reason."
    result_ans = conversation_chain.invoke(full_prompt)
    print(result_ans['answer'])

    try:
        result = extract_score(result_ans['answer'])
    except Exception as e:
        return jsonify({"error": f"Scoring failed: {str(e)}"}), 500

    return jsonify({
        "section": section,
        "answer": answer,
        "score": result.score,
        "reason": result.reason,
        "eligability": result.eligability
    })

# --- Run ---
if __name__ == '__main__':
    app.run(debug=False)


CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox
CropBox missing from /Page, defaulting to MediaBox


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [14/May/2025 09:53:22] "OPTIONS /get_question HTTP/1.1" 200 -


{'section': 'Age'}


127.0.0.1 - - [14/May/2025 09:53:23] "POST /get_question HTTP/1.1" 200 -


content='What is your age?' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 6, 'prompt_tokens': 46, 'total_tokens': 52, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_0392822090', 'finish_reason': 'stop', 'logprobs': None} id='run--7030d485-c920-4809-8aca-bd5f1fa0ba0f-0'
What is your age?


127.0.0.1 - - [14/May/2025 09:53:30] "OPTIONS /evaluate HTTP/1.1" 200 -


{'section': 'Age', 'question': 'Age', 'answer': '23 years old'}
Based on the provided credit scoring criteria, the individual who is 23 years old falls into the age category of 18–24 years. 

**Score Assignment:**
- **Score:** +10 points

**Reasoning:**
- Applicants aged 18–24 are classified as "young borrowers" and are assigned a score of +10 points. This reflects the slightly higher risk associated with younger borrowers due to their limited credit history and potential lack of stable income or financial experience. 

In summary, the individual receives a score of +10 points for being 23 years old, as they are within the eligible age range but are considered a higher risk category.


127.0.0.1 - - [14/May/2025 09:53:35] "POST /evaluate HTTP/1.1" 200 -
127.0.0.1 - - [14/May/2025 09:53:35] "OPTIONS /get_question HTTP/1.1" 200 -


{'section': 'Visa/Residency Status'}


127.0.0.1 - - [14/May/2025 09:53:35] "POST /get_question HTTP/1.1" 200 -


content='What is your current visa or residency status?' additional_kwargs={} response_metadata={'token_usage': {'completion_tokens': 10, 'prompt_tokens': 50, 'total_tokens': 60, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-4o-mini', 'system_fingerprint': 'fp_71b02749fa', 'finish_reason': 'stop', 'logprobs': None} id='run--8d401abb-2fc1-40b5-9a7c-fef727bf5727-0'
What is your current visa or residency status?
