# ⏱️🥱 Students that took too long are removed ❌     

In [26]:
import pandas as pd
import numpy as np

# 1. Read in the data
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
df = pd.read_csv('Ozdb_fivepoints_vo3_2019-2020_vo4_2020-2021.csv', delimiter=';')

pivot_df = pd.pivot_table(df, index=[ 'idll_vo3lv', 'timestamp'], columns='name', values='value', aggfunc=lambda x: x).reset_index()
print("We started with ", pivot_df['idll_vo3lv'].nunique(), " students")

# drop the rows with NaN values in timestampload column and the row below it
pivot_df = pivot_df.dropna(subset=['timestampload'], axis=0).reset_index(drop=True)

# group the pivot_df by idll_vo3lv and get the first and last row of each group
first_last_df = pivot_df.groupby('idll_vo3lv').apply(lambda x: x.iloc[[0,-1]])
first_last_df.reset_index(drop=True, inplace=True)

# group the pivot_df by idll_vo3lv and calculate the time difference between consecutive rows
grouped_df = pivot_df.groupby('idll_vo3lv')['timestamp'].apply(lambda x: pd.to_datetime(x).diff().dt.total_seconds())
# filter the resulting dataframe to keep only the rows where the time difference is greater than 180 seconds
result_df = grouped_df[grouped_df > 180].reset_index()

unique_student_ids = result_df['idll_vo3lv'].unique().tolist()
print("We have ", len(unique_student_ids), " Students who took longer than 180 seconds")
pivot_df = pivot_df[~pivot_df['idll_vo3lv'].isin(unique_student_ids)]

print("And ", pivot_df['idll_vo3lv'].nunique(), " Students left")

Dataset = pivot_df
Dataset['patternsm'] = Dataset['patternsm'].astype(str)
Dataset.head(75)



We started with  4551  students


  grouped_df = pivot_df.groupby('idll_vo3lv')['timestamp'].apply(lambda x: pd.to_datetime(x).diff().dt.total_seconds())


We have  193  Students who took longer than 180 seconds
And  4358  Students left


name,idll_vo3lv,timestamp,numbercl,patternsm,statuscl,timestampcl,timestampload,timestampsm
12,2,2020-10-01 09:52:14,1684572773,11111111,1111111011,"1601538723165,1601538724144,1601538725021,1601...",1601538721805,12106
13,2,2020-10-01 09:52:31,684127858,11011111,111111011,"1601538737663,1601538738454,1601538739815,1601...",1601538734066,17061
14,2,2020-10-01 09:52:40,4562,01011100,1111,"1601538755645,1601538756591,1601538757392,1601...",1601538751289,8516
15,2,2020-10-01 09:52:52,387558568,00101111,111100111,"1601538764144,1601538765306,1601538766035,1601...",1601538759972,12023
16,2,2020-10-01 09:52:58,4321,11110000,1111,"1601538773414,1601538774162,1601538775421,1601...",1601538772148,5470
17,2,2020-10-01 09:53:10,8762333,01100111,1111101,"1601538781442,1601538782939,1601538787197,1601...",1601538777799,12570
18,2,2020-10-01 09:53:16,48615,10011101,11111,"1601538791574,1601538792319,1601538793102,1601...",1601538790537,5034
19,2,2020-10-01 09:53:23,732,01100010,111,160153879841816015388003161601538801321,1601538795840,7318
20,2,2020-10-01 09:53:28,451,10011000,111,160153880527516015388061721601538807063,1601538803499,4793
21,2,2020-10-01 09:53:31,86,00000101,11,16015388094281601538810415,1601538808458,2799


In [21]:
def convert_binary_string(input_string):
    binary_list = input_string.split(',')
    result_list = []
    for i, bit in enumerate(binary_list):
        if bit == 'nan':
            result_list.append('0')
        elif bit == "['0'":
            result_list.append('0')
        else:
            try:
                result_list.append(str(i + 1) if int(bit) == 1 else '0')
            except ValueError:
                result_list.append('0')
    result_string = ','.join(result_list)
    return result_string

Dataset['new_column'] = Dataset['patternsm'].apply(convert_binary_string)
Dataset.head(75)



name,idll_vo3lv,timestamp,numbercl,patternsm,statuscl,timestampcl,timestampload,timestampsm,new_column
12,2,2020-10-01 09:52:14,1684572773,11111111,1111111011,"1601538723165,1601538724144,1601538725021,1601...",1601538721805,12106,12345678
13,2,2020-10-01 09:52:31,684127858,11011111,111111011,"1601538737663,1601538738454,1601538739815,1601...",1601538734066,17061,12045678
14,2,2020-10-01 09:52:40,4562,01011100,1111,"1601538755645,1601538756591,1601538757392,1601...",1601538751289,8516,2045600
15,2,2020-10-01 09:52:52,387558568,00101111,111100111,"1601538764144,1601538765306,1601538766035,1601...",1601538759972,12023,305678
16,2,2020-10-01 09:52:58,4321,11110000,1111,"1601538773414,1601538774162,1601538775421,1601...",1601538772148,5470,12340000
17,2,2020-10-01 09:53:10,8762333,01100111,1111101,"1601538781442,1601538782939,1601538787197,1601...",1601538777799,12570,2300678
18,2,2020-10-01 09:53:16,48615,10011101,11111,"1601538791574,1601538792319,1601538793102,1601...",1601538790537,5034,10045608
19,2,2020-10-01 09:53:23,732,01100010,111,160153879841816015388003161601538801321,1601538795840,7318,2300070
20,2,2020-10-01 09:53:28,451,10011000,111,160153880527516015388061721601538807063,1601538803499,4793,10045000
21,2,2020-10-01 09:53:31,86,00000101,11,16015388094281601538810415,1601538808458,2799,608


# Defining the strategies:

1. d.a.c. : Click as many times as possible
2. Rotation: make 1 firgure, rotate it 4 times, next figure
3. Subpar: Does not understand the objective at hand                       M + M↻
4. Additive: Add 1 more line to your figure untill it is completely filled   R
5. substractive: Remove 1 line until you have 1 or 0 left                    R
6. Single: click 1 line until all 8 lines make 8 different figures           M 

we can make algorithms to see the difference between a machine learining program and the algorithm

## ⚔️ 1. Divide and Conquer ⚔️
The student just submitted a lot of **CORRECT** answers very quickly less than 2.5 seconds per figure.


In [22]:
#code here

## 🔁 2. Rotation 🔁
The student chose **1** figure, rotated that 4 times and moved on to the next figure to do the same.

In [23]:
#code here


## 3. 🤪 Subpar: Does not understand the objective at hand 🤪
A lot of the same figure, a lot of clicks with no real results, very long time between figures etc.

In [24]:
def identify_strategy(actions):
    # Check if the value is a float or NaN
    if isinstance(actions, (float, int)):
        return "Invalid"  # Handle NaN or unexpected numeric values
    elif actions is None:
        return "Missing"  # Handle None or missing values
    else:
        unique_values = len(set(actions))
        if unique_values <= 2:
            return "Subpar"
        else:
            return "Advanced"
 #Apply the identify_strategy function to the 'numbercl' column and create a new 'strategy' column
pivot_df['strategy'] = pivot_df['numbercl'].apply(lambda x: identify_strategy(x))
pivot_df.head(50)

name,idll_vo3lv,timestamp,numbercl,patternsm,statuscl,timestampcl,timestampload,timestampsm,new_column,strategy
12,2,2020-10-01 09:52:14,1684572773,11111111,1111111011,"1601538723165,1601538724144,1601538725021,1601...",1601538721805,12106,12345678,Advanced
13,2,2020-10-01 09:52:31,684127858,11011111,111111011,"1601538737663,1601538738454,1601538739815,1601...",1601538734066,17061,12045678,Advanced
14,2,2020-10-01 09:52:40,4562,01011100,1111,"1601538755645,1601538756591,1601538757392,1601...",1601538751289,8516,2045600,Advanced
15,2,2020-10-01 09:52:52,387558568,00101111,111100111,"1601538764144,1601538765306,1601538766035,1601...",1601538759972,12023,305678,Advanced
16,2,2020-10-01 09:52:58,4321,11110000,1111,"1601538773414,1601538774162,1601538775421,1601...",1601538772148,5470,12340000,Advanced
17,2,2020-10-01 09:53:10,8762333,01100111,1111101,"1601538781442,1601538782939,1601538787197,1601...",1601538777799,12570,2300678,Advanced
18,2,2020-10-01 09:53:16,48615,10011101,11111,"1601538791574,1601538792319,1601538793102,1601...",1601538790537,5034,10045608,Advanced
19,2,2020-10-01 09:53:23,732,01100010,111,160153879841816015388003161601538801321,1601538795840,7318,2300070,Advanced
20,2,2020-10-01 09:53:28,451,10011000,111,160153880527516015388061721601538807063,1601538803499,4793,10045000,Advanced
21,2,2020-10-01 09:53:31,86,00000101,11,16015388094281601538810415,1601538808458,2799,608,Advanced


## ➕ 4. Additive: Add 1 more line to your figure untill it is completely filled ➕
start with one line and add moe and more untill your figure is completely filled


In [25]:
# get all the patterns a student made into an array

#after every "for" imagine that "each" follows
Students_who_are_adders = []
patterns = []
students = []

student_id_list = Dataset['idll_vo3lv'].unique().tolist() #This is the list af all the student IDs
student_pattern_list = [] #This will have all the patterns of all the students and be filled with student_patterns

for idll_vo3lv in student_id_list:
    student_patterns = []
    for pattern in Dataset['patternsm']:
        student_patterns.append(pattern)
    student_pattern_list.append(student_patterns)
student_pattern_list

KeyboardInterrupt: 

In [None]:
def adder(pattern, previous_pattern):
    """
    This function takes a pattern and its predecessor as input and returns True if the pattern is additive, False otherwise.
    """
    



    if pattern.size== previous_pattern.size + 1:
        #if pattern contains the same numbers in the same spots as previous_pattern, plus one more number in the end
        return True
    return False


In [None]:
# for each student, (index in array 1) check if the pattern they made is the same as the one before +1
checkcount = 0
previous_pattern = 0,0,0,0,0,0,0,0
for student_patterns in student_pattern_list:
    for pattern in student_patterns:
        if checkcount == 5:
            students_who_are_adders.append(idll_vo3lv)
        if adder(pattern, previous_pattern):
            checkcount+=1
        else:
            checkcount = 0
        previous_pattern = pattern


AttributeError: 'str' object has no attribute 'size'

In [None]:
input_string = '1,0,0,1,1,0,0,0'


'1,0,0,4,5,0,0,0'

## ➖ 5. substractive: Remove 1 line until you have 1 or 0 left ➖
Start with a complec figure and remove lines untill there is 1 or none left.

In [29]:
# Function to remove lines iteratively
def subtractive_strategy(lines):
    if pd.isna(lines):
        return []

    line_list = list(map(int, lines.split(',')))
    result = []

    # Iterative removal of lines
    for i in range(len(line_list)):
        current_lines = line_list[:i] + line_list[i+1:]
        result.append(','.join(map(str, current_lines)))

    return result

# Apply subtractive strategy to each row in the 'patternsm' column
Dataset['subtractive_strategy'] = Dataset['patternsm'].apply(subtractive_strategy)

# Display the resulting DataFrame
print(Dataset[['idll_vo3lv', 'patternsm', 'subtractive_strategy']])



ValueError: invalid literal for int() with base 10: 'nan'

## 🦆-d 6. Single: click 1 line until all 8 lines make 8 different figures 1️ 🐭
Different figures with one line each


In [None]:
import pandas as pd
import numpy as np

# Function to parse 'patternsm' column
def parse_patternsm(x):
    return list(map(int, x.split(','))) if pd.notna(x) else []

# Function to check if all figures are unique in a list
def has_unique_figures(lst):
    return len(set(lst)) == len(lst)

# Function to check if all elements in the arrays are the same
def all_elements_same(arr):
    return all(x == arr[0] for x in arr)

# Function to identify strategy
def identify_strategy(row):
    pattern_list = parse_patternsm(row['patternsm'])
    number_array = np.array(list(map(int, row['numbercl'].split(',')))) if pd.notna(row['numbercl']) else []
    status_array = np.array(list(map(int, row['statuscl'].split(',')))) if pd.notna(row['statuscl']) else []
    timestamp_array = np.array(list(map(int, row['timestampcl'].split(',')))) if pd.notna(row['timestampcl']) else []

    if has_unique_figures(pattern_list):
        if all_elements_same(number_array) and all_elements_same(status_array) and all_elements_same(timestamp_array):
            return 'Single: click 1 line until all 8 lines make 8 different figures'
        else:
            return 'Unique Figures'
    else:
        return 'Not Unique Figures'

# Apply the strategy identification function to create a 'Strategy' column
pivot_df['Strategy'] = pivot_df.apply(identify_strategy, axis=1)

# Displaying the DataFrame with the new 'Strategy' column
print(pivot_df[['idll_vo3lv', 'patternsm', 'Strategy']])

cluster

In [None]:
import pandas as pd
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# Assuming pivot_df is your DataFrame
# Replace 'numbercl' with the correct column name
features_for_clustering = ['numbercl']

# Extract relevant features and fill NaN values with 0
X = pivot_df[features_for_clustering].fillna(0)

# Choose the number of clusters (you may need to experiment)
k_clusters = 3

# Apply K-Means clustering
kmeans = KMeans(n_clusters=k_clusters, random_state=42)
pivot_df['cluster'] = kmeans.fit_predict(X)

# Visualize clusters
plt.scatter(X, [0] * len(X), c=pivot_df['cluster'], cmap='viridis')
plt.xlabel('Number of Lines Clicked')
plt.title('Clustering of FPT Strategies')
plt.show()