<a href="https://colab.research.google.com/github/Hiba-Magdi/LLM-Thesis-Assessment-/blob/main/Copy_of_LLMthesis_(1).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip -qq install streamlit google-api-python-client google-auth-httplib2 google-auth-oauthlib
!pip install pyngrok
!pip install langchain-google-genai
!pip -qq install pymupdf4llm
!pip install python-docx
!pip install python-dotenv
# RAG Depenencies
!pip install PyMuPDF
!pip install frontend
!pip install tiktoken
!pip install "pinecone[grpc]"


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m44.3/44.3 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.1/9.1 MB[0m [31m60.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m69.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m79.1/79.1 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.9-py3-none-any.whl.metadata (3.6 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading langchain_google_genai-2.0.9-py3-none-any.whl (41 kB)
[2

In [None]:
%%writefile app.py
import os
import streamlit as st
from google import genai  # Using GenerativeModel directly
import pymupdf4llm
from docx import Document
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.http import MediaFileUpload
from dotenv import load_dotenv
from google.genai.types import (GenerateContentConfig)


# RAG Imports
from pinecone import Pinecone, ServerlessSpec
import json
import tiktoken
import fitz

# --- Input Preprocessing Class ---

class ThesisInputProcessor:
    def __init__(self, temp_dir="temp"):
        self.temp_dir = temp_dir
        os.makedirs(self.temp_dir, exist_ok=True)

    def convert_thesis_to_md(self, uploaded_file):

        """Converts a PDF thesis to Markdown."""

        file_path = os.path.join(self.temp_dir, uploaded_file.name)
        with open(file_path, "wb") as f:
            f.write(uploaded_file.getbuffer())
        return pymupdf4llm.to_markdown(file_path)

    def extract_description_text(self, uploaded_description):

        """Extracts text from a Word document."""

        doc = Document(uploaded_description)
        return "\n".join([paragraph.text for paragraph in doc.paragraphs])

    def load_evaluation_criteria(self, file_path):
        """
        Loads evaluation criteria from a JSON file.
        """
        with open(file_path, "r") as f:
            return json.load(f)


class PineconeVectorDB:
    def __init__(self, api_key, environment, index_name):
        self.pinecone = Pinecone(api_key=api_key)
        self.index_name = index_name
        if index_name not in self.pinecone.list_indexes().names():
            self.pinecone.create_index(
                name=index_name,
                dimension=1024,
                metric='euclidean',
                spec=ServerlessSpec(
                    cloud='aws',
                    region='us-east-1'
                )
            )
        self.index = self.pinecone.Index(index_name)

    def embed_text(self, text):
        """
        Embedds text using the Pinecone API.
        """
        embedding = self.pinecone.inference.embed(
            model="multilingual-e5-large",
            inputs=[text],
            parameters={"input_type": "passage", "truncate": "END"},
        )
        return embedding[0]

    def chunk_text_by_tokens(self, text, chunk_size, encoding_name="cl100k_base"):
        """
        Splits the text into chunks based on the number of tokens.
        """
        encoding = tiktoken.get_encoding(encoding_name)
        tokens = encoding.encode(text)
        return [encoding.decode(tokens[i:i + chunk_size]) for i in range(0, len(tokens), chunk_size)]

    def extract_text(self, pdf_path):
        doc = fitz.open(pdf_path)
        text = ""
        for page in doc:
            text += page.get_text()
        return text

    def get_number_of_vectors(self):
        return self.index.describe_index_stats()['total_vector_count']

    def upsert_pdf(self, pdf_file_path):
        try:
            pdf_text = self.extract_text(pdf_file_path)
            chunks = self.chunk_text_by_tokens(text=pdf_text, chunk_size=500)
            vectors = []
            i=self.get_number_of_vectors()
            for chunk in chunks:
                text = chunk
                vector = self.embed_text(text).values
                vectors.append({"id": f"vec{i}",
                                "values": vector,
                                "metadata":{
                                    "text" : text,
                                    "source": "pdf_file",
                                }
                            })
                i+=1
            self.index.upsert(vectors=vectors)
        except Exception as e:
            print(f"Error inserting data: {e}")

    def inject_vectors(self, vectors):
        """
        Injects a list of vectors into the Pinecone index.
        """
        try:
            self.index.upsert(vectors=vectors)
        except Exception as e:
            print(f"Error upserting vectors: {e}")

    def fetch_vectors(self, query, top_k=10):
        """
        Fetches the top_k most similar vectors to the given query vector.
        """
        try:
            query_embedding = query
            results = self.index.query(
                vector=query_embedding,
                top_k=top_k,
                include_metadata=True
            )
            return results.matches
        except Exception as e:
            print(f"Error fetching vectors: {e}")
            return []



class GoogleDriveUploader:
    """Handles uploading files to Google Drive."""
    def __init__(self, api_credentials, folder_id):
        self.creds = service_account.Credentials.from_service_account_info(api_credentials)
        self.drive_service = build('drive', 'v3', credentials=self.creds)
        self.folder_id = folder_id
        self.temp_dir = "temp"  # Add TEMP_DIR as an attribute

    def load_api_credentials(file_path):
      """
       Loads Google Drive API credentials from a json file .
      """
      with open(file_path, "r") as f:

         return json.load(f)

    def upload_file(self, file_path):
        file_metadata = {
            'name': os.path.basename(file_path),
            'parents': [self.folder_id],
        }
        media = MediaFileUpload(file_path, mimetype='text/markdown')
        uploaded_file = self.drive_service.files().create(
            body=file_metadata,
            media_body=media,
            fields='id, name'
        ).execute()
        return uploaded_file

    def upload_evaluation(self, filename, evaluation_text):
        """Uploads the evaluation result to Google Drive."""
        result_file_path = os.path.join(self.temp_dir, f"{filename.replace('.pdf', '')}_evaluation.txt")
        with open(result_file_path, "w") as result_file:
            result_file.write(evaluation_text)
        try:
            uploaded_gdrive_file = self.upload_file(result_file_path) # Use the class's upload_file method
            st.success(f"Uploaded evaluation for '{filename}' to Google Drive successfully!")
        except Exception as e:
            st.error(f"Error uploading evaluation for '{filename}' to Google Drive: {e}")

#Retrival context
def get_context(text, vdb):
    chunks = vdb.chunk_text_by_tokens(text, chunk_size=500)
    queries = []
    for chunk in chunks:
        query = vdb.embed_text(chunk)
        queries.append(query)
    context='### Context Begins here:\n'
    for query in queries:
        results = vdb.fetch_vectors(query.values)
        for result in results:
            context += result.metadata['text'] + '\n'
    context+="### Context Ends here\n"
    return context

def handle_rag_context(uploaded_criteria_files, vdb):
    if uploaded_criteria_files:
        for criteria_file in uploaded_criteria_files:
            criteria_file_path = os.path.join(TEMP_DIR, criteria_file.name)
            if criteria_file.name not in st.session_state.get('criteria_contexts', {}): # Use get to avoid KeyError
                with open(criteria_file_path, "wb") as f_criteria:
                    f_criteria.write(criteria_file.getbuffer())
                vdb.upsert_pdf(criteria_file_path)
                criteria_text = json.dumps(evaluation_criteria)
                context = get_context(criteria_text, vdb)
                st.session_state['criteria_contexts'][criteria_file.name] = context  # Store in session state
            else:
                context = st.session_state['criteria_contexts'][criteria_file.name]
        return context # Return context after processing all files.
    else:
        criteria_text = json.dumps(evaluation_criteria)
        context = get_context(criteria_text, vdb)
        return context

def generate_evaluation_prompt_direct(thesis_name, criteria, md_text, description="", context=""):
    prompt = f"""You are an expert in academic research and evaluation. Evaluate the following thesis titled "{thesis_name}" based on the criteria listed below:

Evaluation Criteria:
{criteria}

Thesis Description:
{description}

Thesis Content:
{md_text}

 context :

{context}

# Instructions:

1. Analyse the thesis provided. If something other than an academic paper was
provided if it does not contain the following chapters : Abstract, Literture review , Methodology , do NOT say anything else , and say ONLY the following:
 Do not analyze and provide any Feedback , your response MUST BE: "This document does not appear to be a thesis. Please make sure that you
submitted the correct paper"


2. While evaluating the thesis, The following questions should be answered based on the corresponding 'Evaluation Criteria' category:

Abstract:
    Does the abstract provide a concise summary of the research activities?
    Does the abstract cover all essential aspects of the research, as described in the evaluation criteria for the 'Abstract' category?

Primary/Secondary Research:
    What type of research is primarily used (tertiary, secondary, primary)?
    Are the sources credible and appropriately recognized?
    Does the research approach align with the descriptions in the evaluation criteria for 'Primary/Secondary Research'?

Research aim, objectives and questions:
    Are the research aim, objectives, and questions clearly defined?
    Is there a clear alignment between the aim, objectives, and research questions?
    Does the clarity and alignment match the descriptions in the 'Research aim, objectives and questions' evaluation criteria?

Research and Project Method (/ology) - the theory:
    Are the arguments well-formed and grounded in rationality?
    Are conclusions and solutions justified and supported by concepts and theories?
    Does the theoretical methodology align with the descriptions in the 'Research and Project Method (/ology) - the theory' evaluation criteria?

Research and Project Method (/ology) - the practical:
   Is the practical work relevant, creative, and well-documented?
    Is there evidence of creativity and effort in the practical work?
    Does the practical methodology and documentation meet the descriptions in the 'Research and Project Method (/ology) - the practical' evaluation criteria?

Result, Discussion and Conclusion:
    Does the thesis present a professional image and contribute to the field of computer science?
    Are the research questions answered by the results and discussion?
    Does the contribution and impact of the results align with the descriptions in the 'Result, Discussion and Conclusion' evaluation criteria?

Thesis Structure:
    Is the thesis well-structured with clear divisions into chapters, sections, etc.?
    Are tables of contents and figures readable and accessible?
    Does the thesis structure align with the descriptions in the 'Thesis Structure' evaluation criteria?


For each category listed below:
1.  Answer the questions provided above for each corresponding category.
2. provide a score (1-10)
3. justify your score based on the provided score levels descriptions
4. identify and list the strengths based on your answers to the questions and the thesis text.
5. identify and list the weaknesses based on your answers to the questions and the thesis text. **Provide specific examples from the thesis where each weakness is observed.**
6. Provide specific and actionable suggestions for improvement. **For each suggestion, provide an example of how the thesis could be improved.**
7. calculate the "Contribution to Final Rating" for each category using the formula [Score x Weight]
8. Final Score: [Just add all section's contributions to find the overall score out of 60]


Answer ALL of the previous requirements in a well-written, formal and comprehensive summary for each category.

Write your summarization in the following format:

**Abstract**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to the Abstract based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to the Abstract based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to the Abstract. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Primary/Secondary Research**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to Primary/Secondary Research based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to Primary/Secondary Research based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to Primary/Secondary Research. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Research aim, objectives and questions**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to Research aim, objectives and questions based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to Research aim, objectives and questions based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to Research aim, objectives and questions. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Research and Project Method (/ology) - the theory**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to Research and Project Method (/ology) - the theory based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to Research and Project Method (/ology) - the theory based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to Research and Project Method (/ology) - the theory. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Research and Project Method (/ology) - the practical**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to Research and Project Method (/ology) - the practical based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to Research and Project Method (/ology) - the practical based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to Research and Project Method (/ology) - the practical. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Result, Discussion and Conclusion**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to Result, Discussion and Conclusion based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to Result, Discussion and Conclusion based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to Result, Discussion and Conclusion. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Thesis Structure**:

**Score**: [Score 1-10]

**Justification**: [Justification based on score level descriptions]

**Strengths**:
1. [Identify and list the strengths related to Thesis Structure based on your answers to the questions and the thesis text.]
2. ...

**Weaknesses**:
1. [Identify and list the weaknesses related to Thesis Structure based on your answers to the questions and the thesis text. **Example from thesis:** ...]
2. ...

**Suggestions**:
1. [Provide specific and actionable suggestions for improvement related to Thesis Structure. **Example Improvement:** ...]
2. ...

**Contribution to Final Rating**: [Score x Weight]

**Overall Evaluation**:
**Final Score**:  [Sum of all Contribution to Final Rating scores out of 60]
**General Feedback**:
[Summarize the overall performance of the thesis, highlighting key strengths and areas for improvement.]

"""

    return prompt

def evaluate_thesis(thesis_name, criteria, md_text, description="", context=""):
    # Generate the evaluation prompt based on input arguments
    evaluation_prompt = generate_evaluation_prompt_direct(thesis_name, criteria, md_text, description, context)

    # Call the Gemini API to generate the evaluation
    gemini_config = GenerateContentConfig(
temperature=0.0)
    response = client.models.generate_content(model='gemini-2.0-flash-exp', contents=evaluation_prompt,config=gemini_config)


    # Extract and return the response content
    return response.text

def evaluate_uploaded_thesis(uploaded_file, uploaded_description, vdb, drive_uploader,input_processor, use_rag, uploaded_criteria_files):
    """Evaluates a single uploaded thesis file."""
    with st.spinner("Evaluating..."):
        thesis_description_text = ""
        if uploaded_description:
            try:
                thesis_description_text = input_processor.extract_description_text(uploaded_description)
            except Exception as e:
                st.error(str(e))

        md_text = input_processor.convert_thesis_to_md(uploaded_file)
        thesis_name = uploaded_file.name.replace(".pdf", "")

        if use_rag:
            context = handle_rag_context(uploaded_criteria_files, vdb) # New function (see below)
        else:
            context = ""

        evaluation_result = evaluate_thesis(thesis_name, evaluation_criteria, md_text, thesis_description_text, context)

        st.write(f"### Evaluation Result for: {uploaded_file.name}")
        st.markdown(evaluation_result)

        return evaluation_result  # Return the result for caching

# --- Configuration ---


#  Access the variables
load_dotenv("keys.env.txt")

pinecone_api_key = os.getenv("PINECONE_API_KEY")
pinecone_environment = os.getenv("PINECONE_ENVIRONMENT")
pinecone_index_name = os.getenv("PINECONE_INDEX_NAME")
GOOGLE_DRIVE_FOLDER_ID = os.getenv("GOOGLE_DRIVE_FOLDER_ID")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

input_processor = ThesisInputProcessor()

GOOGLE_API_CREDENTIALS = GoogleDriveUploader.load_api_credentials("google_api_cridentials.json")
evaluation_criteria = input_processor.load_evaluation_criteria("Rubric.json")


# --- Initialize Components ---
client = genai.Client(api_key=GOOGLE_API_KEY)
drive_uploader = GoogleDriveUploader(GOOGLE_API_CREDENTIALS, GOOGLE_DRIVE_FOLDER_ID)
vdb = PineconeVectorDB(pinecone_api_key, pinecone_environment, pinecone_index_name)



# --- Streamlit App ---
st.title("Thesis Evaluation Assistant")
st.write("""
Upload your thesis (PDF format) and optionally a description (Word document), and I'll evaluate it based on predefined criteria using a direct API call.
""")


if 'interaction_history' not in st.session_state:
    st.session_state['interaction_history'] = []

# --- Add RAG toggle to SideBar
use_rag = st.sidebar.toggle(
    label="Use RAG",
    value=False,
    help="Enable or disable the RAG functionality."
)
use_rag_state = "enabled" if use_rag else "disabled"
st.sidebar.write(f"Use RAG is {use_rag_state}.")

# --- Add Previous Interactions to SideBar ---
with st.sidebar:
    # Adding a divider
    st.divider()
    st.header("Previous Evaluations")
    if st.session_state['interaction_history']:
        for i, interaction in enumerate(reversed(st.session_state['interaction_history'])):
            col1, col2 = st.columns([0.8, 0.2])
            with col1:
                if col1.button(f"View: {interaction['filename']}", key=f"interaction_{i}"):
                    st.session_state['selected_interaction'] = interaction['evaluation']
                    st.session_state['selected_filename'] = interaction['filename']
            with col2:
                if col2.button("X", key=f"clear_{i}",use_container_width=True ):
                    st.session_state['interaction_history'].pop(len(st.session_state['interaction_history']) - 1 - i)
                    st.rerun()
    else:
        st.write("No previous evaluations yet.")

# --- File Upload and Processing ---
st.header("Upload Files")

uploaded_criteria_files = None
if use_rag:
    uploaded_criteria_files = st.file_uploader("Upload your criteria files (PDF format)", type=["pdf"], accept_multiple_files=True)

uploaded_description = st.file_uploader("Upload thesis description (Word format, optional)", type=["docx"])
uploaded_files = st.file_uploader("Upload your thesis files (PDF format)", type=["pdf"], accept_multiple_files=True)


if uploaded_files:
    for uploaded_file in uploaded_files:
        existing_evaluation = next(
            (interaction['evaluation'] for interaction in st.session_state['interaction_history']
             if interaction['filename'] == uploaded_file.name),
            None
        )

        if existing_evaluation:
            st.write(f"### Evaluation Result for: {uploaded_file.name} (from cache)")
            st.markdown(existing_evaluation)
            button_key = f"upload_{uploaded_file.name}"
            if st.button(f"Upload Evaluation to Google Drive", key=button_key):
                drive_uploader.upload_evaluation(uploaded_file.name, existing_evaluation)
            continue

        evaluation_result = evaluate_uploaded_thesis(uploaded_file, uploaded_description, vdb, drive_uploader, input_processor, use_rag, uploaded_criteria_files)

        st.session_state['interaction_history'].append({
            'filename': uploaded_file.name,
            'evaluation': evaluation_result
        })

        button_key = f"upload_{uploaded_file.name}"  # unique key per file
        if st.button(f"Upload Evaluation to Google Drive", key=button_key):
            drive_uploader.upload_evaluation(uploaded_file.name, evaluation_result)

# --- Display Selected Interaction ---
if 'selected_interaction' in st.session_state and st.session_state['selected_interaction']:
    st.header(f"Previous Evaluation for: {st.session_state['selected_filename']}")
    st.markdown(st.session_state['selected_interaction'])

    # Add the "Upload to Google Drive" button for the selected interaction
    button_key = f"upload_{st.session_state['selected_filename']}"  # Unique button key based on the selected file name
    if st.button(f"Upload Evaluation to Google Drive", key=button_key):
        drive_uploader.upload_evaluation(st.session_state['selected_filename'], st.session_state['selected_interaction'])
    if st.button("Clear Evaluation"):
        st.session_state['selected_interaction'] = None
        st.session_state['selected_filename'] = None
        st.rerun()

Writing app.py


In [None]:
!streamlit run app.py &>/dev/null&

In [None]:
!ngrok authtoken "2rXKEgHaIEJUlHFu1BadyYgQ3nw_61LEhKiQ4gMDUHV9XiUYf"

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
from pyngrok import ngrok
# Setup a tunnel to the streamlit port 8501
public_url = ngrok.connect(addr='8501', proto='http', bind_tls=True)
public_url

<NgrokTunnel: "https://a6ef-34-91-182-200.ngrok-free.app" -> "http://localhost:8501">