In [None]:
from google.colab import drive
drive.mount('/content/drive')

#Imports

In [None]:
import pandas as pd
import openpyxl
import time
import json

from openpyxl import Workbook
from openpyxl import load_workbook
from datetime import datetime


#Globals


In [None]:
model_path = "model.gguf"
gramma_path = "model.gguf"

topics_file_path = "/content/drive/MyDrive/ChemTopics.xlsx"
result_file_path = "/content/drive/MyDrive/ChemResults_.xlsx"

questions_per_topic = 10
questions_per_subtopic = 10

#Helpers

##Colors

In [None]:
class colors:
    RED = '\033[91m'
    GREEN = '\033[92m'
    YELLOW = '\033[93m'
    BLUE = '\033[94m'
    PURPLE = '\033[95m'
    CYAN = '\033[96m'
    END = '\033[0m'

def print_in_color(text, color):
    class Colors:
        RED = '\033[91m'
        GREEN = '\033[92m'
        END = '\033[0m'

    if color.lower() == 'red':
        color_code = Colors.RED
    elif color.lower() == 'green':
        color_code = Colors.GREEN
    else:
        color_code = ''

    print(color_code + text + Colors.END)

## Test from model Parser

In [None]:
def remove_last_char(text):
  if text and text[-1] in ",.":
    return text[:-1]
  else:
    return text

def remove_colon_space_start(sentence):
    if sentence.startswith(": "):
        return sentence.replace(": ", "", 1)
    return sentence.capitalize()

class TestQuestion:
    def __init__(self, question, choices, correct_answer_letter, explanation):
        self.question = question
        self.choices = choices
        self.correct_answer_letter = correct_answer_letter
        self.explanation = explanation

class TestQuestionParser:
    def __init__(self, question_str):
        self.question_str = question_str.replace("\"","")

    def parse(self):
      try:
        lines = self.question_str.split('\n')
        question = None
        choices = []
        correct_answer_letter = None
        explanation = None

        for line in lines:
          line_ = line.upper()
          line_ = line_.strip()
          line_ = line_.replace(" ", "")

          print(line_)

          if line_.startswith('QUESTION'):
            question = remove_last_char(line.split(':')[1].strip())
          elif line_.startswith('CORRECTANSWERLETTER:'):
            correct_answer_letter = remove_last_char(line.split(':')[1].strip())
          elif line_.startswith('EXPLANATION:') or line_.startswith('CORRECTANSWEREXPLANATION:'):
            explanation = remove_last_char(line.split(':', 1)[1].strip())
          elif line_.startswith('A)'):
            choice_letter, choice_text = line.split(')', 1)
            choices.append(remove_colon_space_start(remove_last_char(choice_text.strip())))
          elif line_.startswith('B)'):
            choice_letter, choice_text = line.split(')', 1)
            choices.append(remove_colon_space_start(remove_last_char(choice_text.strip())))
          elif line_.startswith('C)'):
            choice_letter, choice_text = line.split(')', 1)
            choices.append(remove_colon_space_start(remove_last_char(choice_text.strip())))
          elif line_.startswith('D)'):
            choice_letter, choice_text = line.split(')', 1)
            choices.append(remove_colon_space_start(remove_last_char(choice_text.strip())))

        if not question:
          raise ValueError("Incomplete data in question string")

        if not choices:
          raise ValueError("Incomplete data in choices string")

        if not correct_answer_letter:
          raise ValueError("Incomplete data in correct_answer_letter string")

        if not explanation:
          raise ValueError("Incomplete data in explanation string")

        return TestQuestion(question, choices, correct_answer_letter, explanation)
      except Exception as e:
        print_in_color("Error occurred while parsing: "+ str(e), "red")


##Excel reader and writer

In [None]:
class Topic:
    def __init__(self, subject, theme, topic, subtopic):
        self.subject = subject
        self.theme = theme
        self.topic = topic
        self.subtopic = subtopic

def parse_excel_rows(file_path):
    topics = []
    df = pd.read_excel(file_path)
    for index, row in df.iterrows():
        done = row['Done']
        if done == True:
          continue
        subject = str(row['Subject'])
        theme = str(row['Theme'])
        topic = str(row['Topic'])
        subtopic = str(row['Subtopic'])
        topic_obj = Topic(subject, theme, topic, subtopic)
        topics.append(topic_obj)
    return topics

def write_to_excel(topic, question, elapsed_time):
    try:
      wb = openpyxl.load_workbook(result_file_path, read_only=False)
    except FileNotFoundError:
      wb = openpyxl.Workbook()
      ws = wb.active
      header_row = ["Subject", "Theme", "Topic", "Subtopic", "Question",
                    "Correct Answer", "Explanation", "Elapsed Time", "Date"]
      header_row.extend(["Choice A", "Choice B", "Choice C", "Choice D"])
      ws.append(header_row)

    ws = wb.active

    current_date = datetime.now().strftime("%Y-%m-%d")

    subject = topic.subject
    theme = topic.theme
    topic_text = topic.topic
    subtopic = topic.subtopic
    question_text = question.question
    correct_answer = question.correct_answer_letter
    explanation = question.explanation

    max_choices = 4
    choices = question.choices[:max_choices]

    row = [subject, theme, topic_text, subtopic, question_text, correct_answer,
          explanation, elapsed_time, current_date]
    row.extend(choices)

    ws.append(row)
    wb.save(result_file_path)

    print(f"Data successfully written to Excel file: {result_file_path}")

import openpyxl
from datetime import datetime

def write_to_excel(result_file_path, topic, question, a, b, c, d, correct_answer,correct_answer_explanation, elapsed_time):
    try:
        wb = openpyxl.load_workbook(result_file_path)
    except FileNotFoundError:
        wb = openpyxl.Workbook()
        ws = wb.active
        header_row = ["Subject", "Theme", "Topic", "Subtopic", "Question",
                      "Correct Answer", "Explanation", "Elapsed Time", "Date",
                      "Choice A", "Choice B", "Choice C", "Choice D"]
        ws.append(header_row)
    else:
        ws = wb.active

    current_date = datetime.now().strftime("%Y-%m-%d")

    subject = topic.subject
    theme = topic.theme
    topic_text = topic.topic
    subtopic = topic.subtopic
    question_text = question
    explanation = correct_answer_explanation
    max_choices = 4
    choices = [a, b, c, d][:max_choices]

    row = [subject, theme, topic_text, subtopic, question_text, correct_answer,
          explanation, elapsed_time, current_date]
    row.extend(choices)

    ws.append(row)
    wb.save(result_file_path)

    print(f"Data successfully written to Excel file: {result_file_path}")

#Modelis


In [None]:
!CMAKE_ARGS="-DLLAMA_CUBLAS=on" pip install llama-cpp-python -q

In [None]:
!pip install langchain -q

In [None]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

In [None]:
#!sudo fuser -v /dev/nvidia*

In [None]:
#!sudo kill -400 PID


In [None]:
n_gpu_layers = -1
n_batch = 256

llm = LlamaCpp(
    model_path=model_path,
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    grammar_path=gramma_path,
    f16_kv=True,
    temperature=0.3,
    max_tokens = 512
)

In [None]:
template = """ <s>[INST]
Formulate 1 a single multiple-choice test question with correct answer letter and correct answer explanation for an eighth grader on the topic of {context}.
 [/INST]
"""

In [None]:
prompt = PromptTemplate.from_template(template)

In [None]:
llm_chain = prompt | llm

#Main

In [None]:
topics = parse_excel_rows(topics_file_path)

In [None]:
for topic in topics:
    query = f"{topic.subject} subject in {topic.theme} about {topic.topic} {topic.subtopic}"
    range_ = questions_per_subtopic
    if topic.subtopic == "nan":
      topic.subtopic = ""
      range_ = questions_per_topic

    for i in range(0, range_):
      llm_chain = prompt | llm
      start_time = time.time()
      response = llm_chain.invoke({"topic": query})
      end_time = time.time()
      elapsed_time = end_time - start_time

      try:
        test_object = json.loads(response)

        print("Question:", test_object["question"])
        print("Option A:", test_object["A"])
        print("Option B:", test_object["B"])
        print("Option C:", test_object["C"])
        print("Option D:", test_object["D"])
        print("Correct Answer:", test_object["correct_answer_letter"])
        print("Explanation:", test_object["correct_answer_explanation"])

        write_to_excel(result_file_path, topic, test_object["question"], test_object["A"], test_object["B"], test_object["C"], test_object["D"], test_object["correct_answer_letter"], test_object["correct_answer_explanation"], elapsed_time)

      except json.JSONDecodeError as e:
        print("Error parsing JSON:", e)
      except KeyError as e:
        print("Error accessing dictionary key:", e)
