<a href="https://colab.research.google.com/github/Nerothemadlad/gradingAssistant/blob/main/gradingAssistantVer2_0.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Allow user to upload the CSV file stored on the local computer (which can be downloaded from Google Form) to his/her virtual machine associated with this Jupyter Notebook

In [None]:
from google.colab import files, data_table

data_table.enable_dataframe_formatter()
uploaded = files.upload()

Extract the file name from the received CSV file and use it to load the CSV file as a pandas dataframe

In [None]:
import pandas as pd
import io

filename = list(uploaded.keys())[0]
df = pd.read_csv(io.BytesIO(uploaded[filename]))
df

###Exclude columns at the beginning or the end that are unrelated to the grading process or require manual evaluation from the table

To exclude columns that are not at the beginning or the end without having to code, use *Microsoft Excel* or *Google Sheets*

In [None]:
from typing import List, Optional
# Get key informations of the table
columns = df.columns
rows_number = len(df) - 1 # exclude the teacher

def select_columns(columns: list) -> List[int]:
  """
  Select the to be excluded columns, which are unrelated to the grading process
  or require manual evaluation

  columns: The list contains all columns' names

  """
  while True:
    print(
        "Only choose columns unrelated to grading process or required manual evaluation"
    )
    columns_at_the_beginning = input(
        "How many columns at the beginning do you wish to exclude (enter 0 for none)? "
    )
    columns_at_the_end = input(
        "How many columns at the end do you wish to exclude (enter 0 for none)? "
    )

    if not (
      columns_at_the_beginning.strip().isnumeric()
      and columns_at_the_end.strip().isnumeric()
  ):
      print("Numbers only, please!")
      continue

    columns_at_the_beginning = int(columns_at_the_beginning)
    columns_at_the_end = int(columns_at_the_end)

    return list(range(columns_at_the_beginning)) + list(
        range(len(columns) - columns_at_the_end, len(columns))
    )

unused_columns_indices = select_columns(columns)

print(unused_columns_indices)

Exclude the unused columns

In [None]:
questions = columns.delete(unused_columns_indices)
questions_number = len(questions)
examinee_answers_table = df[questions]

###Evaluate examinees' answers using the correct answers submitted by the teacher after the exam (located on last row because Google Form auto-sorts answers by submitted times)

In [None]:
import numpy as np
import re

# Extract the correct answers on the last row
right_answers = list(examinee_answers_table.iloc[rows_number])

def evaluate(answer: str, correct_answer: str) -> bool:
  """
  Evaluate each individual answer in the sheet

  answer: The examinee answer
  correct_answer: The answer submitted by the teacher

  """

  # Filter words out of both answers using regex
  # handle NaN cases, if correct answer is NaN, NaN == NaN will still return False
  # so no exceed scores will be added
  if isinstance(answer, float) or isinstance(correct_answer, float):
    return answer == correct_answer
  keywords = re.split(r'\W+', answer)
  correct_keywords = re.split(r'\W+', correct_answer)
  
  # Create a capitalized version of correct answer (high tolerance)
  correct_keywords_capitalized = correct_keywords[:]
  correct_keywords_capitalized[0] = correct_keywords_capitalized[0].capitalize()

  # Get minimal the length of answers and
  # determine loop range for redundant answers
  minimal_length = len(correct_keywords)
  max_loop = len(keywords) - minimal_length + 1

  # Handle the case when the answer is too short
  if max_loop <= 0:
    return False

  # Go words by words to avoid error in conjugation
  # (e.g: 'like' in 'likes' return True but in reality is still False)
  for _ in range(max_loop):
    if keywords[_:(_ + minimal_length)] in (correct_keywords, correct_keywords_capitalized):
      return True

  return False
results = []

# Iterate over each student's answers and evaluate them
for idx, row in examinee_answers_table.iterrows():

  # Check if the correct answers is in the examinees' answers
  # and add the results (True/False) to a table (nested list)
  result = [evaluate(row[idx], x) for idx, x in enumerate(right_answers)]
  results.append(result)

# Convert the the `True/False` table to a matrix of 0 and 1 using NumPy array
# to be used for grading with a score per answer scale vector later
results = np.array(results).astype(int)
print(f'The results of each examinee (1: right, 0: wrong):\n{results}')

###Start the grading process
Create a score per correct answer scale for grading later.

In [None]:
def scores_input() -> None:
  """
  Ask user to enter the scores for every correct answer

  To be returned variable is made global instead to overwrite the values in 
  recursive memory/stack, avoid errors with mutable type variable,
  increase memory efficiency and made code more readable.

  """

  # Create a scores scale variable on global scope
  global SCORE_PER_CORRECT_ANSWER
  SCORE_PER_CORRECT_ANSWER = []
  i = 1

  while True:

    print('Enter "e" to exit after finishing.')
    total_problems = input(f'Number of questions in problem {i}: ')

    if total_problems == 'e':
      break

    if not total_problems.strip().isnumeric():
      print('Numbers only, please!')
      continue
    
    total_problems = int(total_problems)
    total_scores = input('Score for this problem: ')

    # Code can't be refactored because `continue` can't stand alone outside of loops
    if not total_scores.strip().isnumeric():
      print('Numbers only, please!')
      continue

    total_scores = int(total_scores)
    # `+=` is used instead of .append() method because it only extends the original list with elements of the new list
    # instead of adding the new list as an element to the original list and make a nested list (table)
    SCORE_PER_CORRECT_ANSWER += ([total_scores/total_problems] * total_problems)
    i += 1
  
  if len(SCORE_PER_CORRECT_ANSWER) != questions_number:
    print("""\nThe number of scores doesn't match the number of questions!
          \nPlease enter the scores again!\n""")
    scores_input()

scores_input()
# Convert the scale to a NumPy 1d array representing a vector
# to be used for grading in conjunction with the "results" matrix
SCORE_PER_CORRECT_ANSWER = np.array(SCORE_PER_CORRECT_ANSWER)
print(f'\nScore per correct answer scale:\n{SCORE_PER_CORRECT_ANSWER}')

If you're not interested in the technical details of how the codes work, skip ahead.

>***The first line is only a fail safe to explicitly add new axis to the 1d array and make it a 2d "column vector" that the "results" matrix is mutiplied by. It should have no real effects on the process because a 1d NumPy array is normally treated as a 2d "column vector". But it will sometimes be automatically broadcasted to a 2d "row vector" which can cause the multiplication to fail.***

The second line calculates score for each answer using matrix (results) by vector (score_per_correct_answer) multiplication. You can also use other libraries like `Numba`, `Cython` or `operator` to perform the multiplication if you find `NumPy`'s auto broadcasting concept confusing.

In [None]:
SCORE_PER_CORRECT_ANSWER[:, np.newaxis]
scores = results * SCORE_PER_CORRECT_ANSWER
print(f'The score for every question:\n{scores}')

Count the questions in each individual part/section of the exam (i.e. listening, reading, grammatic part in language exams).

In [None]:
def part_questions_counter() -> None:
  """
  Ask user to enter questions number of each section

  """
  
  global PARTS_QUESTIONS
  PARTS_QUESTIONS = []
  i = 1

  while True:

    print('Enter "e" to exit after finishing.')
    part_questions = input(f'Number of questions in part {i}: ')

    if part_questions == 'e':
      break

    if not part_questions.strip().isnumeric():
      print('Numbers only, please!')
      continue
    
    part_questions = int(part_questions)
    PARTS_QUESTIONS.append(part_questions)
    i += 1
  
  if sum(PARTS_QUESTIONS) != questions_number:
    print("""\nNumber of questions in all parts doesn't match the number of questions!
      \nPlease enter the scores again!\n""")
    part_questions_counter()

print(f"If the exam doesn't have multiple sections, enter {questions_number} and then 'e' to exit:")
part_questions_counter()
print(f'Number of questions in each part:\n{PARTS_QUESTIONS}')

Calculate the scores for every part in the exam and save these values to a dictionary object.

In [None]:
from itertools import islice

# Create tuples containing keys for dictionaries
parts = tuple([f'Part {x + 1}' for x in range(len(PARTS_QUESTIONS))])
def ask_for_name_id_column() -> int:
  while True:
    name_id_column = input('Names/IDs are at column number: ')

    if not name_id_column.strip().isnumeric():
      print('Numbers only, please!')
      continue
    
    return int(name_id_column)

name_id_column = ask_for_name_id_column()

examinees = tuple(df.iloc[:rows_number, name_id_column - 1])
examinees_scores = []

def sum_score_for_all_parts(score: list) -> list:
  """
  Calculate the section scores of an examinee

  score: the list containing scores for every answer of an examinee

  """
  part_scores = list()
  score = iter(score)

  for value in PARTS_QUESTIONS:
    part_scores.append(sum(islice(score, value)))
  
  return part_scores

# Calculate the maximum scores for every part
maximum_scores = scores[rows_number]
scores = scores[:rows_number]
maximum_score_per_part = sum_score_for_all_parts(maximum_scores)

# Zip the score to the respective part
for score in scores:
  examinee_scores = dict(zip(parts, sum_score_for_all_parts(score)))
  examinees_scores.append(examinee_scores)

# Zip the scores to the respective examinee
examinees_scores = dict(zip(examinees, examinees_scores))
examinees_scores

Convert the dictionary object to a pandas dataframe and scale the scores for each part (per user choice)

In [None]:
scores_table = pd.DataFrame.from_dict(examinees_scores, orient='index')
def scale_the_scores() -> Optional[float]:
  while True:
    print('To not scale the scores, enter "e" to exit.')
    user_input = input('The maximum score of the scale: ')

    if user_input == 'e':
      return None

    if not user_input.strip().isnumeric():
        print('Numbers only, please!')
        continue
    
    return float(user_input)

scale = scale_the_scores()
if scale:
  for idx, maximum_score in enumerate(maximum_score_per_part):
    scores_table.iloc[:, idx] = round((scores_table.iloc[:, idx]/maximum_score)*scale, 1)

scores_table

###Export the data as an excel file and save it to the local machine

In [None]:
output_filename = filename.split('.')[0] + '_scores.xlsx'
scores_table.to_excel(output_filename)
files.download(output_filename)