<a href="https://colab.research.google.com/github/DeveloperPratim/sses_final/blob/main/LastCommitSSES.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:

from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import re
from sentence_transformers import SentenceTransformer, util  # For semantic similarityimport math

# Global variables for models and tokenizers
model_qwen = None
tokenizer_qwen = None
sentence_transformer = None
models_loaded = False  # Flag to ensure models are loaded only once


def load_models():
    """
    Load global models and tokenizers.
    This function should be called only once.
    """
    global model_qwen, tokenizer_qwen, sentence_transformer, models_loaded

    if not models_loaded:  # Load models only if not already loaded
        print("Loading models...")

        # Load Qwen model
        drive_path_qwen = "/content/drive/MyDrive/sses/Qwen2.5-1.5B-Instruct"
        tokenizer_qwen = AutoTokenizer.from_pretrained(drive_path_qwen, local_files_only=True)
        model_qwen = AutoModelForCausalLM.from_pretrained(drive_path_qwen, local_files_only=True).to(torch.device("cpu"))

        # Load SentenceTransformer for semantic similarity (MiniLM model)
        drive_path_minilm = "/content/drive/MyDrive/sses/all-MiniLM-L6-v2"
        sentence_transformer = SentenceTransformer(drive_path_minilm, device="cpu")

        models_loaded = True
        print("Models loaded successfully.")
    else:
        print("Models are already loaded.")


def generate_response(question, student_answer, marks):
    """
    Generate a response using the Qwen model.

    :param question: The input question.
    :param student_answer: The input student's answer.
    :return: The generated text response.
    """
    if not models_loaded:
        raise RuntimeError("Models are not loaded. Call `load_models()` first.")

    # Prepare the input text
    input_text = f"""
    Question: {question}
    Answer: {student_answer}
    Evaluate and provide these two fields **score** and **feedback** based on this answer where full marks is {marks} and can be given for the most precise and best answer, and 0 for wrong or irrelevant answers. Respond only with these two fields in JSON format.
    """

    # Tokenize the input
    input_ids = tokenizer_qwen.encode(input_text, return_tensors="pt").to(torch.device("cpu"))

    # Dynamically set the maximum number of new tokens
    max_allowed_tokens = model_qwen.config.max_position_embeddings  # Total token limit of the model
    input_token_count = len(input_ids[0])
    max_new_tokens = max(50, max_allowed_tokens - input_token_count)  # Minimum 50 new tokens

    # Generate the response
    output_ids = model_qwen.generate(
        input_ids,
        max_length=input_token_count + max_new_tokens,  # Total length: input + generated tokens
        num_return_sequences=1,
        no_repeat_ngram_size=2,  # Avoid repetition
        early_stopping=True,    # Stop when the output is complete
    )

    # Decode and return the generated output
    return tokenizer_qwen.decode(output_ids[0], skip_special_tokens=True)


def extract_score_and_feedback(generated_text):
    """
    Extract score and feedback from the generated text using regular expressions.

    :param generated_text: The raw output from the model.
    :return: A dictionary with "score" and "feedback".
    """
    score_pattern = r'"score":\s*([0-9.]+)'  # Matches numeric values for score
    feedback_pattern = r'"feedback":\s*"([^"]+)"'  # Matches the feedback string

    # Find matches
    score_match = re.search(score_pattern, generated_text)
    feedback_match = re.search(feedback_pattern, generated_text)

    # Extract values
    score = float(score_match.group(1)) if score_match else None  # Convert to float if match found
    feedback = feedback_match.group(1) if feedback_match else None  # Extract feedback text if match found

    return {"score": score, "feedback": feedback}


def calculate_semantic_similarity(student_answer, expected_answer):
    """
    Calculate the semantic similarity between the student's answer and the expected answer.

    :param student_answer: The student's answer.
    :param expected_answer: The expected correct answer.
    :return: The semantic similarity score.
    """
    if not models_loaded:
        raise RuntimeError("Models are not loaded. Call `load_models()` first.")

    # Encode both answers using the SentenceTransformer model
    embeddings = sentence_transformer.encode([student_answer, expected_answer], convert_to_tensor=True)

    # Compute cosine similarity
    similarity_score = util.pytorch_cos_sim(embeddings[0], embeddings[1]).item()
    return similarity_score


def GetData(question, student_answer, expected_answer, marks):
    """
    Function to generate evaluation data based on the student's answer.

    :param question: The question asked in the exam.
    :param student_answer: The student's answer to the question.
    :param expected_answer: The expected correct answer for the question.
    :param marks: The full marks assigned to the question.

    :return: A dictionary containing the analysis result.
    """
    if not models_loaded:
        raise RuntimeError("Models are not loaded. Call `load_models()` first.")

    # Generate response using Qwen model
    generated_text = generate_response(question, student_answer, marks)
    # Extract score and feedback
    result = extract_score_and_feedback(generated_text)
    score = result['score']
    feedback = result['feedback']

    # Calculate semantic similarity between student's answer and expected answer
    similarity = calculate_semantic_similarity(student_answer, expected_answer)

    # Calculate final marks obtained based on score and semantic similarity
    similarity_weight = 0.5  # You can adjust this weight based on your preference
    final_marks_obtained = (score * 0.5) + (similarity * similarity_weight)

    # Ensure that final_marks_obtained does not exceed the total marks
    final_marks_obtained = min(final_marks_obtained, marks)

    # Return the result as a dictionary
    result_data = {
        "question": question,
        "student_answer": student_answer,
        "expected_answer": expected_answer,
        "marks": marks,
        "score": score,
        "feedback": feedback,
        "similarity": similarity,
        "final_marks_obtained": final_marks_obtained
    }

    return result_data


def GetData(question, student_answer, expected_answer, marks):
    """
    Function to generate evaluation data based on the student's answer.

    :param question: The question asked in the exam.
    :param student_answer: The student's answer to the question.
    :param expected_answer: The expected correct answer for the question.
    :param marks: The full marks assigned to the question.

    :return: A dictionary containing the analysis result.
    """
    if not models_loaded:
        raise RuntimeError("Models are not loaded. Call `load_models()` first.")

    # Generate response using Qwen model
    generated_text = generate_response(question, student_answer, marks)
    # Extract score and feedback
    result = extract_score_and_feedback(generated_text)
    score = result['score']
    feedback = result['feedback']

    # Calculate semantic similarity between student's answer and expected answer
    similarity = calculate_semantic_similarity(student_answer, expected_answer)
    similarity_percentage = round(similarity * 100, 2)  # Convert similarity to percentage

    # Logarithmic scaling of score and similarity
    log_score = math.log(score + 1)  # Logarithm of score (score + 1 to avoid log(0))
    log_similarity = math.log(similarity + 1)  # Logarithm of similarity (similarity + 1 to avoid log(0))

    # Weighted sum of score and similarity
    weighted_score = log_score * 0.5  # Adjust weight as needed
    weighted_similarity = log_similarity * 0.5  # Adjust weight as needed

    # Calculate an initial final marks based on the weighted sum
    final_marks_obtained = weighted_score + weighted_similarity
    final_marks_obtained = final_marks_obtained * marks  # Scale by full marks

    # Apply penalty: if the final marks exceed 90% of full marks, reduce them
    max_allowed_marks = marks * 0.9  # No student should get more than 90% of full marks
    if final_marks_obtained > max_allowed_marks:
        penalty_factor = 0.8  # Apply a 20% penalty
        final_marks_obtained = final_marks_obtained * penalty_factor

    # Ensure that the final marks are never below 0
    final_marks_obtained = max(final_marks_obtained, 0)

    final_marks_obtained = ( final_marks_obtained  + score) /2


    # Calculate percentages
    final_marks_percentage = round((final_marks_obtained / marks) * 100, 2) if marks > 0 else 0
    score_percentage = round((score / marks) * 100, 2) if marks > 0 else 0

    # Analyze the student's answer
    student_word_count = len(student_answer.split())  # Count words in the student's answer
    student_sentence_count = len(re.split(r'[.!?]+', student_answer.strip())) - 1  # Count sentences

    # Return the result as a dictionary
    result_data = {
        "question": question,
        "student_answer": student_answer,
        "expected_answer": expected_answer,
        "marks": marks,
        "score": score,
        "score_percentage": score_percentage,
        "feedback": feedback,
        "similarity": similarity,
        "similarity_percentage": similarity_percentage,
        "final_marks_obtained": final_marks_obtained,
        "final_marks_percentage": final_marks_percentage,
        "student_word_count": student_word_count,
        "student_sentence_count": student_sentence_count
    }

    return result_data

# load models for first time
load_models()  # Load models once

In [None]:
question = "What is the purpose of a file system?"
student_answer = "A file system manages files on storage devices and provides an interface for storing, retrieving, and organizing files."
expected_answer = "A file system organizes and controls how data is stored and retrieved on a computer system."
marks = 5  # Full marks for the question

# Get the evaluation data
evaluation_result = GetData(question, student_answer, expected_answer, marks)
print(evaluation_result)