In [3]:
from google.colab import drive

drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd

# Load the data sets.
students_mat = pd.read_csv('/content/drive/My Drive/student/student-mat.csv', delimiter = ';')
students_por = pd.read_csv('/content/drive/My Drive/student/student-por.csv', delimiter = ';')

#Check to see that they loaded in properly.
students_mat.head()
students_por.head()

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,4,0,11,11
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,2,9,11,11
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,6,12,13,12
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,0,14,14,14
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,0,11,13,13


 This script loads data from two CSV files into pandas DataFrames and then displays the first few rows of each DataFrame to verify that the data has been loaded correctly.

In [5]:
# Merging the datasets together to account for only unique values between the datasets.

merged_data = pd.concat([students_mat, students_por], ignore_index = True)

# Implementing a list of common identifiers to filter our unique students.
common_identifires = ['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'reason', 'nursery', 'internet']
unique_students = merged_data.drop_duplicates(subset = common_identifires).reset_index(drop = True)

unique_students

Unnamed: 0,school,sex,age,address,famsize,Pstatus,Medu,Fedu,Mjob,Fjob,...,famrel,freetime,goout,Dalc,Walc,health,absences,G1,G2,G3
0,GP,F,18,U,GT3,A,4,4,at_home,teacher,...,4,3,4,1,1,3,6,5,6,6
1,GP,F,17,U,GT3,T,1,1,at_home,other,...,5,3,3,1,1,3,4,5,5,6
2,GP,F,15,U,LE3,T,1,1,at_home,other,...,4,3,2,2,3,3,10,7,8,10
3,GP,F,15,U,GT3,T,4,2,health,services,...,3,2,2,1,1,5,2,15,14,15
4,GP,F,16,U,GT3,T,3,3,other,other,...,4,3,2,1,2,5,4,6,10,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
657,MS,F,19,R,GT3,A,1,1,at_home,at_home,...,3,5,4,1,4,1,0,8,0,0
658,MS,F,18,R,GT3,T,2,2,services,other,...,4,2,1,1,1,4,5,14,14,15
659,MS,F,18,R,LE3,A,1,2,at_home,other,...,4,3,4,1,4,5,0,16,15,15
660,MS,F,19,R,GT3,T,1,1,at_home,other,...,4,3,3,1,1,3,4,7,8,9


This script merges two datasets into one DataFrame and then filters out duplicate student records based on common identifiers, resulting in a DataFrame (unique_students) containing only unique student entries based on the specified identifiers.

In [6]:
## Implementing a couple linear searches.
def linear_search_by_absences(data, target_absences):
    position = 0
    while position < len(data):
        if data.iloc[position]['absences'] == target_absences:
            return position
        position += 1
    return -1


linear_search_by_absences(unique_students, 10)

2

This script executes a linear_search_by_absences(unique_students, 10), it will search for the first occurrence of a student with 10 absences in the unique_students DataFrame and return the index of that student if found, otherwise, it will return -1.

In [7]:
def linear_search_by_age(data, target_age):
    position = 0
    while position < len(data):
        if data.iloc[position]['age'] == target_age:
            return position
        position += 1
    return -1



linear_search_by_age(unique_students, 17)

1

When you execute linear_search_by_age(unique_students, 17), it will search for the first occurrence of a student aged 17 in the unique_students DataFrame and return the index of that student if found, otherwise, it will return -1.

In [None]:
## Implementing a couple binary searches. (age)

import pandas as pd

def binary_search_by_age(data, target_age):
    # Sort the DataFrame by 'age' column
    data_sorted = data.sort_values(by='age')

    # Initialize pointers for binary search
    left = 0
    right = len(data_sorted) - 1

    while left <= right:
        mid = (left + right) // 2
        mid_age = data_sorted.iloc[mid]['age']

        if mid_age == target_age:
            return mid
  # Found the target age at index mid
        elif mid_age < target_age:
            left = mid + 1
        else:
            right = mid - 1

    return -1
    # Target age not found

# Example usage:
# Assuming unique_students is your DataFrame
result_index = binary_search_by_age(unique_students, 17)
if result_index != -1:
    print(f"Target age found at index {result_index}.")
else:
    print("Target age not found.")


Target age found at index 330.


 Executing a binary_search_by_age(unique_students, 17), it will search for the first occurrence of a student aged 17 in the unique_students DataFrame using binary search and return the index of that student if found, otherwise, it will return -1.

In [8]:
## Implementing a couple binary searches. (absences)

import pandas as pd

def binary_search_by_absences(data, target_absences):
    # Sort the DataFrame by 'absences' column
    data_sorted = data.sort_values(by='absences')

    # Initialize pointers for binary search
    left = 0
    right = len(data_sorted) - 1

    while left <= right:
        mid = (left + right) // 2
        mid_absences = data_sorted.iloc[mid]['absences']

        if mid_absences == target_absences:
            return mid  # Found the target absences at index mid
        elif mid_absences < target_absences:
            left = mid + 1
        else:
            right = mid - 1

    return -1  # Target absences not found

# Example usage:
# Assuming unique_students is your DataFrame
result_index = binary_search_by_absences(unique_students, 10)
if result_index != -1:
    print(f"Target absences found at index {result_index}.")
else:
    print("Target absences not found.")


Target absences found at index 558.


Executing a binary_search_by_absences(unique_students, 10), will search for the first occurrence of a student with 10 absences in the unique_students DataFrame using binary search and return the index of that student if found, otherwise, it will return -1.

In [None]:

# Implementing an insertion sorting algorithm to sort G3 grades. (Final Grades and G2)

def insertion_sort(data, column):
    for i in range(1, len(data)):
        student_row = data.iloc[i]  # Current student who's age is to be compared

        # Move rows who's age is greater than student_row,
        # to one position in front of their current position
        j = i - 1
        while j >= 0 and student_row[column] < data.iloc[j][column]:
            data.iloc[j + 1] = data.iloc[j]
            j -= 1

        data.iloc[j + 1] = student_row  # Insert key into its correct position

    return data


print("Insertion sorted data by age:", insertion_sort(unique_students, 'age'))
print("Insertion sorted data by G2:", insertion_sort(unique_students, 'G2'))

Insertion sorted data by age:     school sex  age address famsize Pstatus  Medu  Fedu      Mjob      Fjob  \
0       GP   F   15       U     LE3       T     1     1   at_home     other   
1       GP   F   15       U     GT3       T     4     2    health  services   
2       GP   M   15       U     LE3       A     3     2  services     other   
3       GP   M   15       U     GT3       T     3     4     other     other   
4       GP   F   15       U     GT3       T     4     4   teacher    health   
..     ...  ..  ...     ...     ...     ...   ...   ...       ...       ...   
657     MS   M   20       R     GT3       T     1     1     other     other   
658     MS   M   21       R     GT3       T     1     1     other     other   
659     GP   F   21       U     LE3       T     4     4     other     other   
660     GP   M   21       R     LE3       T     1     1   at_home     other   
661     GP   M   22       U     GT3       T     3     1  services  services   

     ... famrel freet

This script provides a simple implementation of insertion sort to sort student data based on specified columns, allowing for better understanding and analysis of the dataset. Insertion sorts are the fastest way to sort information.

In [None]:
# Implementing an selection sorting algorithm to sort grades and G2.
def selection_sort(data, column):
    # Traverse through all array elements
    for i in range(len(data)):

        # Find the minimum element in the unsorted part of the array
        min_index = i
        for j in range(i + 1, len(data)):
            if data.iloc[j][column] < data.iloc[min_index][column]:
                min_index = j

        # Swap the found minimum element with the first element
        data.iloc[i], data.iloc[min_index] = data.iloc[min_index], data.iloc[i]

    return data

# Example usage:

print("Selection sorted array by age:", selection_sort(unique_students, 'age'))
print("Selection sorted array by G2:", selection_sort(unique_students, 'G2'))

Selection sorted array by age:     school sex  age address famsize Pstatus  Medu  Fedu      Mjob      Fjob  \
0       GP   F   15       R     GT3       T     3     4  services   teacher   
1       GP   F   15       U     GT3       T     1     1   at_home     other   
2       GP   M   15       R     GT3       T     3     4   at_home   teacher   
3       GP   F   15       U     GT3       T     4     4  services   at_home   
4       GP   F   15       R     GT3       T     1     1     other     other   
..     ...  ..  ...     ...     ...     ...   ...   ...       ...       ...   
657     MS   M   20       R     GT3       T     1     1     other     other   
658     GP   M   21       R     LE3       T     1     1   at_home     other   
659     MS   M   21       R     GT3       T     1     1     other     other   
660     GP   F   21       U     LE3       T     4     4     other     other   
661     GP   M   22       U     GT3       T     3     1  services  services   

     ... famrel free

This script provides a simple implementation of selection sort to sort student data based on specified columns, allowing for better understanding and analysis of the dataset. Although not as fast as an insertion sort, it would perform well on a small list.

In [11]:
import pandas as pd

# Define the insertion_sort_grades function
def insertion_sort_grades(grades):
    for i in range(1, len(grades)):
        key = grades[i]
        j = i - 1
        while j >= 0 and grades[j][1] > key[1]:
            grades[j + 1] = grades[j]
            j -= 1
        grades[j + 1] = key
    return grades

# Extracting 'G3' grades along with their indices from the DataFrame
grades_g3 = list(unique_students['G3'].items())

# Applying Insertion Sort to sort the grades along with their original indices
sorted_grades = insertion_sort_grades(grades_g3)

# Extracting sorted indices for reordering the DataFrame
sorted_indices_for_g3 = [index for index, grade in sorted_grades]

# Reordering the DataFrame to reflect the sorted 'G3' grades
sorted_grades_g3 = unique_students.iloc[sorted_indices_for_g3]

# Creating a new DataFrame from the reordered data
sorted_grades_g3_df = pd.DataFrame(sorted_grades_g3, columns=['school', 'sex', 'age', 'address', 'famsize', 'Pstatus', 'Medu', 'Fedu', 'Mjob', 'Fjob', 'G3']).reset_index(drop=True)

# Print the sorted DataFrame
print(sorted_grades_g3_df)

    school sex  age address famsize Pstatus  Medu  Fedu      Mjob      Fjob  \
0       GP   M   18       R     GT3       T     2     2  services     other   
1       GP   F   15       R     GT3       T     3     4  services   teacher   
2       GP   F   15       U     GT3       T     1     1   at_home     other   
3       GP   M   15       R     GT3       T     3     4   at_home   teacher   
4       GP   F   15       U     GT3       T     4     4  services   at_home   
..     ...  ..  ...     ...     ...     ...   ...   ...       ...       ...   
657     GP   M   15       U     LE3       A     4     4   teacher   teacher   
658     GP   M   15       U     LE3       T     4     2   teacher     other   
659     GP   F   18       U     GT3       T     2     2   at_home   at_home   
660     MS   F   18       R     LE3       T     4     4     other     other   
661     GP   M   16       U     GT3       T     4     3    health  services   

     G3  
0     0  
1     0  
2     0  
3     0  
4

This script sorts the DataFrame unique_students based on the 'G3' grades in ascending order and creates a new DataFrame (sorted_grades_g3_df) with the sorted data.