In [5]:
import os
from dotenv import load_dotenv

load_dotenv()

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
# MODEL = "gpt-3.5-turbo"
# MODEL = "mixtral:8x7b"
MODEL = "llama2"

In [6]:
from langchain_community.llms import Ollama
from langchain_openai.chat_models import ChatOpenAI
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai.embeddings import OpenAIEmbeddings

if MODEL.startswith("gpt"):
    model = ChatOpenAI(openai_api_key=OPENAI_API_KEY, model=MODEL)
    embeddings = OpenAIEmbeddings()
else:
    model = Ollama(model=MODEL)
    embeddings = OllamaEmbeddings(model=MODEL)

In [7]:
# Rule-based evaluation function
def evaluate_document_rule_based(document_content):
    scores = {
        "Energy Efficiency": 0,
        "Maintainability": 0,
        "Scalability": 0,
        "Green Software Compliance": 0
    }

    keywords = {
        "Energy Efficiency": ["energy-efficient", "low power", "optimized"],
        "Maintainability": ["maintainable", "easy to update", "modular"],
        "Scalability": ["scalable", "scalability", "expandable"],
        "Green Software Compliance": ["green software", "eco-friendly", "compliant"]
    }

    for criterion, words in keywords.items():
        for word in words:
            if word in document_content.lower():
                scores[criterion] += 1

    for criterion in scores:
        if scores[criterion] > 0:
            scores[criterion] = min(5, scores[criterion])
        else:
            scores[criterion] = 1

    return scores

In [8]:
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import DocArrayInMemorySearch
from operator import itemgetter





# Define prompt template
template = """
Evaluate the software architecture document based on the following sustainability criteria:
1. Energy Efficiency
2. Maintainability
3. Scalability
4. Green Software Compliance

Context: {context}

Provide a score (1-5) for each criterion and a brief justification for each score.
"""

prompt = PromptTemplate.from_template(template)

# Load and split PDF document
loader = PyPDFLoader("2.pdf")
pages = loader.load_and_split()

# Create vector store
vectorstore = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)

# Initialize retriever
retriever = vectorstore.as_retriever()

# Define chain with prompt, model, and parser
parser = StrOutputParser()
chain = (
    {
        "context": itemgetter("question") | retriever,
        "question": itemgetter("question"),
    }
    | prompt
    | model
    | parser
)

In [9]:
# Function to evaluate a document
def evaluate_document(document_path):
    loader = PyPDFLoader(document_path)
    pages = loader.load_and_split()
    document_content = "\n".join([page.page_content for page in pages])
    
    # Use the rule-based system for initial evaluation
    rule_based_scores = evaluate_document_rule_based(document_content)
    
    # Combine rule-based scores with LLM evaluation if needed
    question = "Evaluate the document based on sustainability criteria."
    context = document_content
    llm_response = chain.invoke({"context": context, "question": question})
    
    return rule_based_scores, llm_response

In [10]:
# Example usage
document_path = "2.pdf"
rule_based_result, llm_result = evaluate_document(document_path)
print("Rule-Based Result:", rule_based_result)
print("LLM Result:", llm_result)

Rule-Based Result: {'Energy Efficiency': 1, 'Maintainability': 1, 'Scalability': 1, 'Green Software Compliance': 1}
LLM Result: Certainly! Based on the provided document, here are my scores and explanations for each criterion:

1. Completeness: 4/5
The document provides a detailed explanation of the architecture and design of the system, including the data model, data access layer, software architecture, and external APIs. However, there is no information on the system's performance or security aspects, which are crucial for a comprehensive evaluation. Therefore, I score it 4 out of 5.
2. Reusability: 3/5
While the document does mention the use of technology-specific frameworks for database interaction and external tools for report and form generation, there is no information on the modularity or abstraction of the system's components. This makes it difficult to assess the reusability of the system's components. Therefore, I score it 3 out of 5.
3. Maintainability: 4/5
The document pro