<a href="https://colab.research.google.com/github/CallumPaton/bioAI/blob/main/bioAI_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
# Libraries
import pandas as pd
import numpy as np
from google.colab import drive
import seaborn as sns 
import matplotlib.pyplot as plt # matplotlib for plotting graphs

# %matplotlib inline renders plot inline on your page
%matplotlib inline

In [27]:
# mount drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [28]:
# import question dataframes
algebra = pd.read_csv('/content/drive/MyDrive/bioAI/algebra.csv')
geometry = pd.read_csv('/content/drive/MyDrive/bioAI/geometry.csv')
numbers = pd.read_csv('/content/drive/MyDrive/bioAI/numbers.csv')
probability= pd.read_csv('/content/drive/MyDrive/bioAI/probability.csv')
ratios = pd.read_csv('/content/drive/MyDrive/bioAI/ratios.csv')

In [None]:
combined = pd.concat([algebra, geometry,numbers, probability,ratios], ignore_index =True)
combined

In [34]:
def generate_initial_questions(knowledge_dict):
  '''
  Takes a dictionary of students current knowledge level to return appropriate
  initial question set. 
  '''
  initial_questions = []
  for syllabus_area, understanding_score in knowledge_dict.items():
    # map understanding between 0-1 to a difficulty score from 1-10
    difficulty_level = round(understanding_score*10)
     
    # find closest questions to the calculated difficulty le
    df = combined[combined['Topic']==syllabus_area]
    closest_difficulty = min(df['Difficulty'], key=lambda x: abs(x-difficulty_level))
    closest_question = df.loc[df['Difficulty'] == closest_difficulty].sample(n=1)
    initial_questions.append(closest_question)
  
  return pd.concat(initial_questions, ignore_index =True)




In [37]:
def ask_questions(question_df):
    '''
    Asks the questions in the current question set and saves marks to a vector
    '''
      # create an empty list to store the results
    results = []
    # iterate over each row in the initial_questions_df dataframe
    for index, row in question_df.iterrows():
        # print the question
        print(row['Question'])
        print(f'Topic: {row["Topic"]}')
        print(f'Difficulty: {row["Difficulty"]}/10')
        # get an answer from the user using the input function
        user_answer = input()
        # check if the answer is correct
        answer = row['Answer'].lower().strip('"')
        if user_answer.lower() == answer:
            # if the answer is correct, add 1 to the results list
            results.append(1)
            print('Correct!')
        else:
            # if the answer is incorrect, add 0 to the results list
            results.append(0)
            print('Incorrect.')
            print(f'Answer: {answer}')
        print('') # print a blank line for formatting
    return results


In [7]:
def fitness_function(difficulty, marks):
  return ((11-difficulty)*0.1+(1.1-marks))/2.1

In [32]:
def selection(fitness_scores, topics, adjustment_factor = 8):
  '''
  Take the fitness scores and use them to calculate the proportional selection
  and difficulty adjustment for next generation
  '''
  df = pd.DataFrame({'Topic':topics.tolist()})
  df['Old_Difficulty'] = current_generation.Difficulty.tolist()
  df['Marks'] = results
  df['Fitness Score'] = fitness_scores
  # proportional selection
  proportional_selection = np.transpose(np.array(fitness_scores)/np.sum(fitness_scores))
  df['Selection_Probability'] = proportional_selection

  # difficulty adjustment
  adjusted_difficulties = []
  index = 0

  for score in fitness_scores:
    # if fitness score is less than 0.5, questions get harder, else they get easier
    # adjustment factor determines how extreme the jump is from q to q.
    if score >= 0.5:
      adjustment = (1 - score)*adjustment_factor
      new_difficulty = max(current_generation.Difficulty.tolist()[index] - adjustment, 1)
    else:
      adjustment = score*adjustment_factor
      new_difficulty = min(current_generation.Difficulty.tolist()[index] + adjustment, 10)
    
    index += 1
    adjusted_difficulties.append(new_difficulty)
  df['New_Difficulty'] = adjusted_difficulties
  
  return df

In [31]:
# use selection criterio to 
def generational_replacement(selection_df):
  """
  Uses the selection criterion to select new question set from database
  """
  copy_df = combined.copy()
  topics = selection_df['Topic'].tolist()
  probabilities = selection_df['Selection_Probability'].tolist()
  difficulties = selection_df['New_Difficulty'].tolist()

  # select five topics based on the selection proDbability
  indexes = range(len(topics))

  selected_indexes = np.random.choice(indexes, size=5, p=probabilities, replace=True)
  selected_topics = [topics[i] for i in selected_indexes]

  # create an empty list to store the questions
  questions = []

  # counter
  counter = 0

  # loop over the selected topics and select a question from the combined dataframe
  for topic in selected_topics:
    # get the correct new difficulty
    # filter the combined dataframe based on the selected topic and new difficulty
    filtered_df = copy_df[(copy_df['Topic'] == topic)]

    closest_row_index = (filtered_df['Difficulty'] - difficulties[selected_indexes[counter]]).abs().idxmin()
    closest_row = filtered_df.loc[closest_row_index]
    copy_df = copy_df.drop(index = closest_row_index)

    # randomly select a question from the filtered dataframe
    selected_question = closest_row.to_frame().transpose()
    # add the selected question to the list of questions
    questions.append(selected_question)
    counter += 1

  # concatenate the questions into a new dataframe
  new_question_set = pd.concat(questions, ignore_index=True)  
  return new_question_set


In [24]:
import random
def mutation(selected_questions):
  '''
  Takes the selected questions for the next generation and randomly swaps one
  of the rows with another from the database.
  '''
  row_idx_questions = random.choice(selected_questions.index)
  row_idx_database = random.choice(combined.index)
  replacement = combined.loc[row_idx_database]

  selected_questions.loc[row_idx_questions] = replacement

  return selected_questions

  #randomly choose a row 

In [38]:
# colab formatting
pd.set_option('max_colwidth', 400)
pd.set_option('display.expand_frame_repr', False)

# generate initial question set based on a dictionary containiing the current level of understanding
global_knowledge_level = {'Algebra':0.8,'Geometry':0.8,'Numbers':0.2,'Probability':0.5,'Ratios':0.2}

# Get initial question set based on global knowledge dictionary
initial_questions = generate_initial_questions(global_knowledge_level)

# set initial qustions to current gen
current_generation = initial_questions
print('Initialised Question Set Based on Current Understanding')
print('-------------------------------------------------------')
print(current_generation[['Topic','Difficulty']])
generation_count = 1

while True:

  print(f'-----------------Generation {generation_count}-----------------')
  # ask questions
  results = ask_questions(current_generation)

  print('----CALCULATING FITNESS SCORES----')
  # calculate fitness score
  fitness_scores = [fitness_function(difficulty, marks) for difficulty, marks in zip(current_generation.Difficulty.tolist(), results)]
  
  
  # selection 
  selection_df = selection(fitness_scores,current_generation.Topic)
  
  print(selection_df)

  
  print('------CREATING NEW QUESTION SET------')
  # generational_replacemment 
  # update current_generation
  current_generation = generational_replacement(selection_df)
  print('---------APPLYING MUTATION---------')
  current_generation = mutation(current_generation)
  generation_count += 1 




Initialised Question Set Based on Current Understanding
-------------------------------------------------------
         Topic  Difficulty
0      Algebra           8
1     Geometry           8
2      Numbers           2
3  Probability           5
4       Ratios           2
-----------------Generation 1-----------------
Solve the equation 3x^2-9x+6=0
Topic: Algebra
Difficulty: 8/10
x=2, x=4
Incorrect.
Answer: x = 2, x=1

If a=(-4 -1) and b = (3 -1). What is the vector 2a+b?
Topic: Geometry
Difficulty: 8/10
(-2 -1)
Incorrect.
Answer: (-5 -3)

Write the following number in standard form: 0.0000415
Topic: Numbers
Difficulty: 2/10
4.15x10^-5
Correct!

Faye is testing if a dice is fair. She throws it 120 times. How many times is it expected to land on 1?
Topic: Probability
Difficulty: 5/10
20
Correct!

Divide £600 in the ratio 9:6:5
Topic: Ratios
Difficulty: 2/10
300:200:100
Incorrect.
Answer: 270:180:150

----CALCULATING FITNESS SCORES----
         Topic  Old_Difficulty  Marks  Fitness Scor

KeyboardInterrupt: ignored