# Llobo Squad Project - EduTrackAI!

# Grades

In [34]:
import pandas as pd
import numpy as np
import os

In [52]:
grades = pd.read_excel('EducationBasev1.xlsx', usecols='Q:AF')
grades = grades.loc[:grades.last_valid_index()]
grades.tail(8)

Unnamed: 0,School year,School period,Arts,Physical Education,Philosofy,Sociology,English,Physics,Natural Sciences,Chemistry,Biology,Geography,History,Mathematics,Portuguese Language,Absences
40,2 Medio,2016-02-01,5.53,3.78,3.24,4.62,4.89,4.08,,3.28,5.18,5.45,4.16,3.14,5.1,4.0
41,2 Medio,2016-04-01,4.43,5.24,3.97,3.49,3.02,3.93,,6.97,4.72,5.56,6.64,5.04,4.95,4.0
42,2 Medio,2016-06-01,3.0,5.74,6.88,3.23,3.18,3.9,,5.03,4.59,5.01,5.23,5.09,5.38,4.0
43,2 Medio,2016-08-01,6.36,5.17,4.64,6.51,4.43,4.37,,5.32,4.6,5.86,5.46,6.98,5.4,6.0
44,3 Medio,2017-02-01,3.73,6.14,4.57,6.88,6.07,3.18,,6.38,4.95,4.96,5.63,5.44,3.54,5.0
45,3 Medio,2017-04-01,5.48,6.56,6.96,3.04,5.56,5.12,,4.88,5.21,4.88,5.35,6.95,3.0,6.0
46,3 Medio,2017-06-01,5.77,3.89,6.37,5.12,4.74,4.75,,5.15,4.75,3.55,4.8,5.08,4.78,6.0
47,3 Medio,2017-08-01,5.22,4.77,6.42,5.03,7.0,3.5,,3.75,4.01,4.92,4.48,5.82,8.23,9.0


In [53]:
# Obtain outlier performances

def get_outliers(grades):
    recent_period = grades.tail(8) #gets current and previous year only
    disciplines = list(recent_period.columns[2:-1])

    grades_mean = {}
    positive_outliers = []
    negative_outliers = []

    for discipline in disciplines:
        grade = recent_period[discipline].mean()
        if not np.isnan(grade):
            grades_mean[discipline] = round(recent_period[discipline].mean(), 2)

    for key in grades_mean:
        if grades_mean[key] >= 8:
            positive_outliers.append(key)
        elif grades_mean[key] <= 5:
            negative_outliers.append(key)

    if len(positive_outliers) == 0:
        positive_outliers = 'No positive performance highlights.'
    else:
        positive_outliers = ", ".join(positive_outliers) + "."
    if len(negative_outliers) == 0:
        negative_outliers = 'No negative performance highlights.'
    else:
        negative_outliers = ", ".join(negative_outliers) + "."

    return f"Positive performance: {str(positive_outliers)} Negative performance: {negative_outliers}"

print(get_outliers(grades))

Positive performance: No positive performance highlights. Negative performance: Arts, Sociology, English, Physics, Biology.


In [54]:
# Obtain students performance

def get_performance(grades):
    recent_period = grades.tail(4) #gets current year only
    disciplines = list(recent_period.columns[2:-1])

    grades_mean = []

    for discipline in disciplines:
        grade = recent_period[discipline].mean()
        if not np.isnan(grade):
            grades_mean.append(grade)

    grades_mean = round(np.asarray(grades_mean).mean(), 2)

    num_reprovadas = len(get_outliers(grades)[1])

    if grades_mean >= 9 and num_reprovadas <= 1:
        return 'Very-high general performance'
    elif grades_mean >= 7.5 and num_reprovadas <= 2:
        return 'Medium-high general performance'
    elif grades_mean >= 6:
        return 'Medium general performance'
    elif grades_mean >= 5:
        return 'Low general performance'
    else:
        return 'Very-low performance'

print(get_performance(grades))

Low general performance


In [56]:
# Obtain relative frequency

def get_relative_frequency(grades):
    year_dayoffs = grades['Absences'].tail(4).reset_index(drop=True) #gets current year only
    year_dayoffs = year_dayoffs.to_frame(name='Absences')

    year_dayoffs['Lessons'] = [50, 100, 150, 200]

    max_index = year_dayoffs['Absences'].idxmax()
    max_dayoffs = year_dayoffs.loc[max_index, 'Absences']
    max_lessons = year_dayoffs.loc[max_index, 'Lessons']

    relative_frequency = round(1 - (max_dayoffs / max_lessons), 2)

    return relative_frequency

# Obtain frequency warning

def get_frequency_warning(grades):
    if get_relative_frequency(grades) <= 0.7:
        return ('Frequency warning')
    else:
        return ('No frequency warning')

print(get_relative_frequency(grades))
print(get_frequency_warning(grades))

0.96


# Multi-Agent System Setting

In [57]:
# Function pack for data handling

# Function to check and correct major functions returns.

def check_size(data_list):
    if len(data_list) == 1:
        return data_list[0]
    elif len(data_list) > 1:
        return data_list
    else:
        return None

# Function to retrieve data until first NaN in selected column, initial and final indexes.

def get_column_data(data, column_name, initial_index=0, final_index=None):
     # Define the range of rows to consider
    final_index = len(data) if final_index is None else final_index
    filtered_data = data.loc[initial_index:final_index, column_name]

    # Find the index of the first NaN value
    first_nan_index = filtered_data.isna().idxmax()

    # Check if NaN was found within the range
    if pd.isna(filtered_data[first_nan_index]):
        return check_size(filtered_data.loc[:first_nan_index - 1].tolist())

    # If no NaN is found, return the entire filtered range as a list
    request_data = filtered_data.tolist()

    return check_size(request_data)

# Function to retrieve data from 2 columns, and attach data to a single dictionary.
def get_column_dict_data(data, first_column_name, second_column_name, initial_index=0, final_index=None):
    # Define the range of rows to consider
    final_index = len(data) if final_index is None else final_index
    subset = data.loc[initial_index:final_index, [first_column_name, second_column_name]]

    # Drop rows with NaN values in either column
    subset = subset.dropna()

    # Ensure both columns have the same length
    if subset[first_column_name].size != subset[second_column_name].size:
        raise ValueError("Columns have mismatched sizes within the selected range.")

    # Create dictionary from the two columns
    column_data = dict(zip(subset[first_column_name], subset[second_column_name]))

    return column_data

def get_column_tuple_data(data, first_column_name, second_column_name, initial_index=0, final_index=None):
    # Define the range of rows to consider
    final_index = len(data) if final_index is None else final_index
    subset = data.loc[initial_index:final_index, [first_column_name, second_column_name]]

    # Drop rows with NaN values in either column
    subset = subset.dropna()

    # Ensure both columns have the same length
    if subset[first_column_name].size != subset[second_column_name].size:
        raise ValueError("Columns have mismatched sizes within the selected range.")

    # Create a list of tuples
    column_data = list(zip(subset[first_column_name], subset[second_column_name]))

    return column_data

### Agents

In [58]:
# Environment setting

import openai
from google.colab import userdata

api_key = userdata.get("SAMBANOVA_API_KEY")

In [59]:
# Comments descriptor agent

def comments_descriptor_agent(this_year_comments):
    system_prompt = "You are an assistant who specializes in making teacher comment summaries based on a set of teacher comments. Write a text listing the information from the comments. Only generate a summary of the comments, do not give suggestions for improvements or any other notes."
    user_prompt = f"### Commments: {this_year_comments}"

    client = openai.OpenAI(
        api_key=userdata.get("SAMBANOVA_API_KEY"),
        base_url="https://api.sambanova.ai/v1",
    )

    response = client.chat.completions.create(
        model='Meta-Llama-3.1-8B-Instruct',
        messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
        temperature =  0.4,
        max_tokens = 1200,
    )

    recent_comments_synthesis = response.choices[0].message.content

    return recent_comments_synthesis

In [74]:
# Behavioural profile agent

def behavioural_profile_agent(name, latest_description, this_year_comments, diagnostics):
    system_prompt = "You are an assistant specialized in creating behavioral profiles of students based on a description, diagnoses (if any) and comments from teachers. Write a text with well-structured and related information, providing valuable insights into student behavior. Structure the text containing: main characteristics, relationship with the diagnosis and general observations. Just define the student's behavior, don't give suggestions for improvements."
    user_prompt = f"### Name: {name} \n\n ### Description: {latest_description} ### Comments: {this_year_comments} \n\n ### Diagnostics: {diagnostics}"

    client = openai.OpenAI(
        api_key=userdata.get("SAMBANOVA_API_KEY"),
        base_url="https://api.sambanova.ai/v1",
    )

    response = client.chat.completions.create(
        model='Meta-Llama-3.1-8B-Instruct',
        messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
        temperature =  0.4,
        max_tokens = 1200,
    )

    behavioural_profile = response.choices[0].message.content

    return behavioural_profile

In [61]:
# General description agent

def general_description_agent(name, performance, outliers, behavioural_profile, this_year_comments):
    system_prompt = "You are a specialized assistant generating the general description of students based on the student's academic performance, the student's best subjects, their behavioral profile and comments made by teachers over time. Briefly describe the student based on academic performance and the comments described. The description must be a summarized paragraph and cover the most important information, and may take into account the months of each comment (to show changes in the student's behavior throughout the year) and the reports to relate the behaviors to the cognitive problems presented, if applicable. the case. Just describe the situation, do not give directions or suggestions to teachers or parents."
    user_prompt = f"### Name: {name} \n\n ### Academic performance: {performance} \n\n ### Best subjects: {outliers} \n\n ### Behavioral profile: {behavioural_profile} \n\n ### Comments: {this_year_comments}"

    client = openai.OpenAI(
        api_key=userdata.get("SAMBANOVA_API_KEY"),
        base_url="https://api.sambanova.ai/v1",
    )

    response = client.chat.completions.create(
        model='Meta-Llama-3.1-8B-Instruct',
        messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
        temperature =  0.6,
        max_tokens = 1200,
    )

    general_profile = response.choices[0].message.content

    return(general_profile)

In [73]:
# Historical description agent

def historical_description_agent(name, all_comments_text):
    system_prompt = "You are an assistant who specializes in creating students' behavioral history based on feedback over time. Analyze the comments described and write a few paragraphs based on the descriptions provided. The text must contain only general behavioral information, considering the dates of the comments to show the main changes in the student's behavior over time. Do not list comments. At the end, present a short paragraph containing general observations."
    user_prompt = f"### Name: {name} \n\n ### Comments: {all_comments_text}"

    client = openai.OpenAI(
        api_key=userdata.get("SAMBANOVA_API_KEY"),
        base_url="https://api.sambanova.ai/v1",
    )

    response = client.chat.completions.create(
        model='Meta-Llama-3.1-8B-Instruct',
        messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
        temperature =  0.5,
        max_tokens = 3000,
    )

    historical_description = response.choices[0].message.content

    return historical_description

In [62]:
# Skills agent

def skills_agent(name, performance, this_year_comments):
    system_prompt = "You are an assistant specialized in identifying student skills, competencies and areas for improvement based on teacher feedback, academic performance and history over time. List 5 up strengths and 5 up weaknesses for the student based on the performance and comments described. At the end, create a short paragraph that briefly justifies the strengths and weaknesses. Strengths and weaknesses should only be named, not explained. Don't invent information, just use the data provided."
    user_prompt = f"### Name: {name} \n\n ### Academic performance: {performance} \n\n ### Teacher feedback: {this_year_comments}"

    client = openai.OpenAI(
        api_key=userdata.get("SAMBANOVA_API_KEY"),
        base_url="https://api.sambanova.ai/v1",
    )

    response = client.chat.completions.create(
        model='Meta-Llama-3.1-8B-Instruct',
        messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
        temperature =  0.5,
        max_tokens = 1500,
    )

    skills = response.choices[0].message.content

    return skills

In [63]:
# Orientation agent

def orientation_agent(name, skills, performance, outliers, historical_description, general_profile):
    system_prompt = "You are an assistant specialized in providing guidance to teachers and students' parents, based on the student's skills, their academic performance, their highlights in subjects, their behavioral history and the student's general description. In order to improve the student's academic performance, list 3 suggestions for the teacher to help the student. Then list 3 suggestions for parents to help students based on the student's description. Avoid generic suggestions, focus on specific suggestions for the student's case. Give suggestions that help the student with their difficulties. Just list the suggestions punctually, without describing them."
    user_prompt = f"### Name: {name} \n\n ### Skills: {skills} \n\n ### Academic performance: {performance} \n\n ### Highlights in subjects: {outliers} \n\n ### Behavioral history: {historical_description} \n\n ### General description: {general_profile}"

    client = openai.OpenAI(
        api_key=userdata.get("SAMBANOVA_API_KEY"),
        base_url="https://api.sambanova.ai/v1",
    )

    response = client.chat.completions.create(
        model='Meta-Llama-3.1-8B-Instruct',
        messages=[{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}],
        temperature =  0.5,
        max_tokens = 1500,
    )

    general_profile = response.choices[0].message.content

    return general_profile

# Main

In [64]:
student_data = pd.read_excel('EducationBasev1.xlsx', usecols='A:P')
student_data = student_data.loc[:student_data.last_valid_index()]

student_data.head()

Unnamed: 0,Registration,Name,Date of birth,Age,Sex,Teachers comments date,Teachers comments,Disease report,General description,Skills,Behavioral analysis,Guidance,History tracking,General performance,Low frequency alert,Relative frequency
0,231109.0,Luiz Zamprogno,2000-08-09,24.0,M,2006-03-07,Is familiarizing with numbers and basic operat...,TDAH,Luiz Zamprogno is a student who presents an av...,**Strengths:**\n\n1. Curiosity in experimental...,**Analysis of Luiz Zamprogno's Behavior**\n\n*...,**Suggestions for the teacher to help Luiz:**\...,**Evolution of Luiz Zamprogno's Behavior Over ...,Low general performance,No frequency warning,0.96
1,,,NaT,,,2006-03-27,Showed interest in artistic activities.,,,,,,,,,
2,,,NaT,,,2006-05-23,Showed progress in reading and understanding s...,,,,,,,,,
3,,,NaT,,,2006-06-22,Demonstrates interest in stories and curiositi...,,,,,,,,,
4,,,NaT,,,2006-10-15,Is consolidating skills in addition and subtra...,,,,,,,,,


In [69]:
# Important feature extraction from whole environment

current_year = 2017

# Basic informations
name = get_column_data(student_data, 'Name')
comments = get_column_tuple_data(student_data, 'Teachers comments date', 'Teachers comments')
diagnostics = get_column_data(student_data, 'Disease report')

# Grades algorithm
outliers = get_outliers(grades)
performance = get_performance(grades)
relative_frequency = get_relative_frequency(grades)
frequency_warning = get_frequency_warning(grades)

# Agent information
behavioural_profile = None
historical_profile = None
general_profile = None

# Comments filtration
this_year_comments = ""

for comment in comments:
    if str(current_year) in str(comment[0].date()):
        this_year_comments += str(comment[0].date()) + " - " + comment[1] + "\n"

# Latest description filtration
latest_description = all_descriptions[-1]


# All comments filtration
all_comments_text = ""

for comment in comments:
    date = str(comment[0].date())
    comment = comment[1]
    all_comments_text += date + " - " + comment + "\n"

In [75]:
# Agent task chain

behavioural_profile = behavioural_profile_agent(name, latest_description, this_year_comments, diagnostics)
historical_description = historical_description_agent(name, all_comments_text)
general_profile = general_description_agent(name, performance, outliers, behavioural_profile, this_year_comments) #uses behavourial agent's response
skills_description = skills_agent(name, performance, this_year_comments)
orientation = orientation_agent(name, skills_description, performance, outliers, historical_description, general_profile) #uses general, skills and historical agent's response

In [76]:
%%capture

# Excel file filling

student_data.loc[0, 'General description'] = general_profile
student_data.loc[0, 'Skills'] = skills_description
student_data.loc[0, 'Behavorial analysis'] = behavioural_profile
student_data.loc[0, 'Guidance'] = orientation
student_data.loc[0, 'History tracking'] = historical_description
student_data.loc[0, 'General performance'] = performance
student_data.loc[0, 'Relative frequency'] = str(relative_frequency).replace(".", ",")
student_data.loc[0, 'Low frequency alert'] = frequency_warning


full_data = pd.concat([student_data, grades], axis=1)

full_data.to_excel('BasedeDadosEducação1.xlsx', index=False)

In [77]:
full_data.head()

Unnamed: 0,Registration,Name,Date of birth,Age,Sex,Teachers comments date,Teachers comments,Disease report,General description,Skills,...,English,Physics,Natural Sciences,Chemistry,Biology,Geography,History,Mathematics,Portuguese Language,Absences
0,231109.0,Luiz Zamprogno,2000-08-09,24.0,M,2006-03-07,Is familiarizing with numbers and basic operat...,TDAH,Luiz Zamprogno is a student with a low general...,"Based on the provided data, here are 5 strengt...",...,6.07,,6.39,,5.32,4.64,6.43,4.11,4.03,4.0
1,,,NaT,,,2006-03-27,Showed interest in artistic activities.,,,,...,4.58,,3.78,,3.96,3.88,4.35,4.1,5.99,6.0
2,,,NaT,,,2006-05-23,Showed progress in reading and understanding s...,,,,...,3.76,,3.06,,6.03,5.63,3.46,4.96,4.38,6.0
3,,,NaT,,,2006-06-22,Demonstrates interest in stories and curiositi...,,,,...,4.58,,3.81,,5.98,6.88,3.13,6.51,4.2,11.0
4,,,NaT,,,2006-10-15,Is consolidating skills in addition and subtra...,,,,...,6.43,,5.59,,3.55,5.99,3.22,6.6,4.14,5.0
