<a href="https://colab.research.google.com/github/0x-alex-s/autograder/blob/main/autograder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Optional environment check
from psutil import *
print("Number of CPU: ", cpu_count())
!cat /proc/cpuinfo

In [None]:
# Wrap output for reading more conveniently
from IPython.display import HTML, display

def set_css():
  display(HTML('''
  <style>
    pre {
        white-space: pre-wrap;
    }
  </style>
  '''))
get_ipython().events.register('pre_run_cell', set_css)

In [None]:
# Install dependencies not available in Colab environment and download SpaCy large dataset
!pip install python-dotenv langchain_mistralai

import spacy.cli
spacy.cli.download("en_core_web_lg")

In [None]:
import os
from google.colab import drive
from dotenv import load_dotenv

drive.mount('/content/drive')

load_dotenv('/content/drive/MyDrive/Colab Notebooks/.env')
mistral_key = os.getenv('MISTRAL_KEY')

In [None]:
import torch
import torch.nn.functional as F
import numpy as np
from transformers import AutoTokenizer, AutoModel
from collections import Counter
from string import punctuation
from sklearn.metrics.pairwise import cosine_similarity
import spacy
from dataclasses import dataclass
from typing import List, Dict, Tuple, Optional
from scipy.stats import percentileofscore
from sentence_transformers import SentenceTransformer

class Grader:
    def __init__(self, model_name = "sentence-transformers/all-mpnet-base-v2"):
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model = AutoModel.from_pretrained(model_name)
        self.nlp = spacy.load('en_core_web_lg')

    def _get_embedding(self, text: str) -> np.ndarray:
        inputs = self.tokenizer(text, padding=True, truncation=True,
                              return_tensors="pt", max_length=512)

        with torch.no_grad():
            outputs = self.model(**inputs)

        attention_mask = inputs['attention_mask']
        token_embeddings = outputs.last_hidden_state
        input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
        sentence_embedding = torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp(input_mask_expanded.sum(1), min=1e-9)

        return sentence_embedding.numpy()

    def _calculate_coherence(self, answer: str) -> float:
        doc = self.nlp(answer)
        sentences = list(doc.sents)

        if len(sentences) < 2:
            return 1.0

        coherence_scores = []
        for i in range(len(sentences) - 1):
            sent1_embed = self._get_embedding(sentences[i].text)
            sent2_embed = self._get_embedding(sentences[i + 1].text)
            similarity = cosine_similarity(sent1_embed, sent2_embed)[0][0]
            coherence_scores.append(similarity)

        return np.mean(coherence_scores)

    def assess_answer(self,
                     student_answer: str,
                     model_answer: str,
                     rubric = None):

        student_embedding = self._get_embedding(student_answer)
        model_embedding = self._get_embedding(model_answer)

        similarity_score = cosine_similarity(student_embedding, model_embedding)[0][0]

        coherence_score = self._calculate_coherence(student_answer)

        weights = rubric.get('weights', {'similarity': 0.4, 'coherence': 0.2}) if rubric else {
            'similarity': 0.4, 'coherence': 0.2
        }

        final_score = ( weights['similarity'] * similarity_score + weights['coherence'] * coherence_score)

        return {
            "similarity_score": similarity_score,
            "coherence_score": coherence_score,
            "final_score": final_score,
        }

In [None]:
assessor = Grader()

# Example question and answers
benchmark_answer = """
    A solution to track items in real-time and share this information with customers.
    Optimize routes so we can reduce unnecessary driving and fuel consumption.
    With RFID-technology for tracking instead of barcodes one can achieve faster and
    more efficient real-time tracking that often require a person to scan every code by hand.
    """

student_answer = """
I would propose making a software who can predict the traffic and make the most efficient
delivery route based on available information from cameras and such. And for the real-time
tracking problem, I would suggest that the customer can see where the vehicle delivering the
package is real-time. Giving every package a trackable chip of some sort would be very
expensive, but of course if the customer is willing to pay for, that could be a solution as well.
    """

rubric = {
        'weights': {
            'similarity': 0.7,
            'coherence': 0.3
        }
    }

result = assessor.assess_answer(student_answer, benchmark_answer, rubric)

print(f"Similarity Score: {result['similarity_score']:.2f}")
print(f"Coherence Score: {result['coherence_score']:.2f}")
print(f"Final Score: {result['final_score']:.2f}")

Similarity Score: 0.46
Coherence Score: 0.40
Final Score: 0.44


In [None]:
import json
from langchain_mistralai import ChatMistralAI
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain.schema.output_parser import StrOutputParser
from tenacity import retry, stop_after_attempt, wait_exponential

class LLMEnhancedAssessor:
    def __init__(self, base_assessor, api_key, model = "mistral-large-latest"):

        self.base_assessor = base_assessor
        self.model = model
        self.llm = ChatMistralAI(
            model_name=model,
            mistral_api_key=api_key,
            temperature=0
        )

    @retry(stop=stop_after_attempt(3), wait=wait_exponential(multiplier=1, min=4, max=10))
    async def _generate_llm_analysis(self, student_answer, benchmark_answer, base_assessment):

        analysis_prompt = ChatPromptTemplate.from_messages([
            ("system", "You are an experienced teaching assistant. Your task is to provide constructive and detailed feedback."),
            ("user", """
            Analyze student's answer based on the following inputs.

            Benchmark Answer: {benchmark_answer}
            Student Answer: {student_answer}

            Answer scores:
            - Similarity to benchmark answer: {similarity_score}
            - Coherence: {coherence_score}

            Respond in a conversational manner from second person point of view. Start with a detailed conceptual analysis,
            then highlight specific areas of strength and areas for improvement, followed by personalized learning suggestions.
            Do not split into sections. Use bullet lists as necessary. No greeting. No introductory paragraph. No formatting.
            """)
        ])

        chain = analysis_prompt | self.llm

        result = await chain.ainvoke({
            "benchmark_answer": benchmark_answer,
            "student_answer": student_answer,
            "similarity_score": base_assessment['similarity_score'],
            "coherence_score": base_assessment['coherence_score']
        })

        return result

    async def generate_feedback(self, student_answer, benchmark_answer):

        rubric = {
          'weights': {
              'similarity': 0.7,
              'coherence': 0.3
          }
        }

        base_result = self.base_assessor.assess_answer(
            student_answer, benchmark_answer, rubric
        )

        llm_analysis = await self._generate_llm_analysis(
            student_answer,
            benchmark_answer,
            base_result,
        )

        return {
            "score": base_result['final_score'],
            "tutor_feedback": llm_analysis.content,
        }

In [None]:
base_assessor = Grader()

rubric = {
        'weights': {
            'similarity': 0.7,
            'coherence': 0.3
        }
    }

enhanced_assessor = LLMEnhancedAssessor(
        base_assessor=base_assessor,
        api_key=mistral_key
    )

benchmark_answer = """
    SDG 13 climate action (reduce emissions) and SDG 8 (decent work and economic growth). May be SDG 11 as well (smart cities and communities). G12 (responsible consumption and production), G9: industry, innovations and infrastructure.  SDG 17 which is “partnerships to reach a goal”.
    """

student_answer = """
My solutions will positively impact SDG goal number 11 and 13. Number 11 because my
solution require Iots, and I don’tsee anything wrong in sharing the information we are
gathering for the traffic software with rest of the city if it can help the environment.Number
13 because the software makes the transport more efficient, less Co2 is released from the
transport vehicles, helpingwith stopping global warming.
    """

result = await enhanced_assessor.generate_feedback(
        student_answer=student_answer,
        benchmark_answer=benchmark_answer,
    )

print(f"Score: {result['score']:.2f}")
print(result['tutor_feedback'])

Score: 0.49
Your answer partially aligns with the benchmark answer, as you've identified SDG 13 (Climate Action) and SDG 11 (Sustainable Cities and Communities) as areas your solution would impact. However, the benchmark answer also includes SDG 8, 9, 12, and 17, which you haven't mentioned. Let's break down your response to identify areas of strength and areas for improvement.

Conceptually, you've understood that your solution contributes to making cities more sustainable and combating climate change. This is evident in your explanation of how your traffic software can help reduce CO2 emissions, which is a key aspect of SDG 13. You've also understood the role of IoT in creating smart cities, which aligns with SDG 11. However, you've missed out on the broader implications of your solution:

- **Strengths:**
  - You've clearly explained how your solution directly impacts SDG 13 by reducing emissions.
  - You've identified that your use of IoT relates to SDG 11.
  - You've expressed you