# Item Bank Calibration Analysis

## Imports & Libraries

In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
import plotly.express as px
import ast

In [2]:
item_bank = pd.read_csv('./Datasets/EST II Biology - March 2022 Working Copy.csv')

In [3]:
item_bank.head()

Unnamed: 0,FIRSTNAME,SURNAME,USERNAME,STUDENT_TOTAL_SCORE,ASSESSMENT_MAX_SCORE,ASSESSMENT_STARTED,ASSESSMENT_ENDED,ITEM_1_SCORE,ITEM_1_MAX_SCORE,ITEM_2_SCORE,...,ITEM_76_SCORE,ITEM_76_MAX_SCORE,ITEM_77_SCORE,ITEM_77_MAX_SCORE,ITEM_78_SCORE,ITEM_78_MAX_SCORE,ITEM_79_SCORE,ITEM_79_MAX_SCORE,ITEM_80_SCORE,ITEM_80_MAX_SCORE
0,Begad,Yasser Abdelrehim Hassan,dupes@live.com,37,80,2022-03-26T08:02:38Z,2022-03-26T09:04:47Z,0.0,1,1.0,...,0.0,1,0.0,1,1.0,1,1.0,1,0.0,1
1,Sofian,Mohamed Fahmi Mohamed Mohamed Fayed,sofiianfayed@gmail.com,20,80,2022-03-26T08:04:04Z,2022-03-26T09:04:46Z,1.0,1,0.0,...,1.0,1,0.0,1,0.0,1,0.0,1,0.0,1
2,Youssef,Walid Ragab Mohamed Ragab,walidbhit@hotmail.com,34,80,2022-03-26T08:07:18Z,2022-03-26T09:08:40Z,1.0,1,1.0,...,1.0,1,0.0,1,0.0,1,0.0,1,1.0,1
3,Youssef,Sherif Youssef Owida Hassan,youssefowida04@gmail.com,33,80,2022-03-26T08:12:27Z,2022-03-26T09:13:28Z,0.0,1,1.0,...,1.0,1,0.0,1,1.0,1,0.0,1,1.0,1
4,Rouaa,Hamdi Abdalla Ali,hamdyroaa1@gmail.com,28,80,2022-03-26T08:01:58Z,2022-03-26T09:04:23Z,1.0,1,0.0,...,0.0,1,0.0,1,0.0,1,0.0,1,1.0,1


In [4]:
item_bank.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1828 entries, 0 to 1827
Columns: 167 entries, FIRSTNAME to ITEM_80_MAX_SCORE
dtypes: float64(77), int64(85), object(5)
memory usage: 2.3+ MB


## EDA

    Filtering Item Top Score Columns

In [5]:
item_bank = item_bank[item_bank.columns.drop(list(item_bank.filter(regex=r'ITEM_[1-9][0-9]?_MAX_SCORE')))]

    Cleaning Time of taking test data

In [6]:
item_bank['ASSESSMENT_STARTED'] = item_bank["ASSESSMENT_STARTED"].str[11:]
item_bank['ASSESSMENT_ENDED'] = item_bank["ASSESSMENT_ENDED"].str[11:]
item_bank["ASSESSMENT_STARTED"] = item_bank["ASSESSMENT_STARTED"].str.replace("Z","")
item_bank["ASSESSMENT_ENDED"] = item_bank["ASSESSMENT_ENDED"].str.replace("Z","")

In [7]:
item_bank.columns = map(str.lower, item_bank.columns)
item_bank = item_bank.sort_values(by = 'student_total_score')
item_bank = item_bank.drop(columns=['assessment_max_score'])

In [8]:
def transform_to_percentage():
    item_bank['student_total_score_percentage'] = item_bank['student_total_score'].apply(lambda x: (x*100) / 70)

transform_to_percentage()

In [9]:
def assign_student_ability(student_total_score_percentage: int):
    
    if student_total_score_percentage <= 20:
        return 'Ability 1'
    
    elif 20 < student_total_score_percentage < 50:
        return 'Ability 2'

    elif 50 <= student_total_score_percentage < 70:
        return 'Ability 3'
    
    elif 70 <= student_total_score_percentage < 85:
        return 'Ability 4'

    elif 85 <= student_total_score_percentage < 95:
        return 'Ability 5'        

    else:
        return 'Ability 6'

item_bank['student_ability'] = item_bank['student_total_score_percentage'].apply(lambda student_total_score_percentage: assign_student_ability(student_total_score_percentage))


In [10]:
item_bank.head()

Unnamed: 0,firstname,surname,username,student_total_score,assessment_started,assessment_ended,item_1_score,item_2_score,item_3_score,item_4_score,...,item_73_score,item_74_score,item_75_score,item_76_score,item_77_score,item_78_score,item_79_score,item_80_score,student_total_score_percentage,student_ability
1082,Nanci,Ahmed Mohamed Sameh,nancisameh@gmail.com,7,08:01:15,08:46:00,1.0,,,,...,,,,,,,,,10.0,Ability 1
591,Mohamed,Fady Abdou El Basyouni,manobeatbox44@gmail.com,7,08:03:29,08:35:08,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,10.0,Ability 1
1499,Bavly,Awad Faragallah Rizkalla,bavlyawadfarag@gmail.com,8,08:01:16,08:33:44,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,11.428571,Ability 1
363,Malek,Sobhe Abdul Diam,maleksobhe91@gmail.com,9,08:01:50,08:31:02,0.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,12.857143,Ability 1
326,Hady,Wadie Samy,hadywadie2021@gmail.com,9,08:00:35,09:02:01,1.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,12.857143,Ability 1


In [13]:
items_mean_score_list = []
items_mean_name_list = []

def check_items_mean(item):
    item_score = item_bank[item].describe()
    x = item_score['mean']
    items_mean_name_list.append(item)
    items_mean_score_list.append(x)
    # print (item ,x)

for x in range(1, 80):
    item = 'item_{}_score'.format(x)
    check_items_mean(item)

print(items_mean_score_list, items_mean_name_list)

[0.5216201423097975, 0.44986301369863013, 0.43852908891328213, 0.3351618211738892, 0.7016967706622879, 0.2314004376367615, 0.23958333333333334, 0.2591982427237781, 0.535675082327113, 0.45673603504928806, 0.34318555008210183, 0.43969298245614036, 0.31599123767798465, 0.30010952902519167, 0.6363636363636364, 0.2839168490153173, 0.5, 0.4103126714207351, 0.3033461327482172, 0.40777230432402845, 0.2804610318331504, 0.2602965403624382, 0.23940561364887178, 0.11281489594742607, 0.7396061269146609, 0.6284931506849315, 0.2501368363437329, 0.3167123287671233, 0.3322386425834702, 0.3930921052631579, 0.4723590585659551, 0.36383561643835616, 0.3925438596491228, 0.45668859649122806, 0.36308871851040525, 0.3183561643835616, 0.22252747252747251, 0.2678375411635565, 0.2149122807017544, 0.4731359649122807, 0.29489291598023065, 0.1630971993410214, 0.17233809001097694, 0.18221734357848518, 0.2630422844590884, 0.2508232711306257, 0.2068019747668678, 0.42073505211190343, 0.33881578947368424, 0.2906593406593

In [17]:
df = pd.DataFrame(list(zip(items_mean_name_list, items_mean_score_list)),
               columns =['Item Name', 'Difficulty'])

df.head(25)

Unnamed: 0,Item Name,Difficulty
0,item_1_score,0.52162
1,item_2_score,0.449863
2,item_3_score,0.438529
3,item_4_score,0.335162
4,item_5_score,0.701697
5,item_6_score,0.2314
6,item_7_score,0.239583
7,item_8_score,0.259198
8,item_9_score,0.535675
9,item_10_score,0.456736
