In [3]:
import pandas as pd
from src import files_import
from src import cleaning

## sleep

In [4]:
def build_mapping():
    """
    בונה מילון שממפה את המספרים שבתוך הסוגריים למספרים שמחוץ לסוגריים.
    """
    data = "[0]0, [1]3, [2]3.5, [3]4, [4]4.5, [5]5, [6]5.5, [7]6, [8]6.5, [9]7, [10]7.5, [11]8, [12]8.5, [13]9, [14]9.5, [15]10, [16]10.5, [17]11, [18]11.5, [19]12"
    pairs = data.split(", ")
    mapping = {}

    for pair in pairs:
        if not pair.strip():  # דילוג על ערכים ריקים
            continue
        # מציאת המפתח (מספר בתוך הסוגריים) והערך (מספר מחוץ לסוגריים)
        key = pair[pair.find("[") + 1 : pair.find("]")]  # המספר שבתוך הסוגריים
        value = pair[pair.find("]") + 1 :].strip()  # המספר שמחוץ לסוגריים
        mapping[key] = value

    return mapping


mapping = build_mapping()

In [7]:
def create_sleep_dictionary():
    sleep_folder_path = "data/StudentLife/dataset/EMA/response/Sleep"
    cleaned_data = cleaning.clean_data_from_jsons_folder(sleep_folder_path)
    final_data = []
    for student in cleaned_data:
        # חיתוך ה-name כך שיישאר רק חלק ה-u00
        name = student['file_name'].split('_')[1].split('.')[0]
        # שמירת record count ללא שינוי
        record_count = student['record_count']
        
        total_hours_sum = 0
        total_rate_sum = 0
        for i in range (record_count):
            current_data = student['data'][i]
            total_hours_sum += float (mapping[current_data['hour']])
            total_rate_sum += int (current_data['rate'])

        avg_hour = total_hours_sum / record_count
        avg_rate = total_rate_sum / record_count
    
        # יצירת המילון החדש עבור כל סטודנט
        final_data.append({
            'name': name,
            'record_count': record_count,
            'avg_hour' : avg_hour,
            'avg_rate' : avg_rate
        })

    return final_data

result = create_sleep_dictionary()
print(len(result))
print(result)

49
[{'name': 'u00', 'record_count': 55, 'avg_hour': 6.054545454545455, 'avg_rate': 1.981818181818182}, {'name': 'u01', 'record_count': 27, 'avg_hour': 6.111111111111111, 'avg_rate': 1.5925925925925926}, {'name': 'u02', 'record_count': 26, 'avg_hour': 6.461538461538462, 'avg_rate': 2.0}, {'name': 'u03', 'record_count': 28, 'avg_hour': 6.535714285714286, 'avg_rate': 1.7142857142857142}, {'name': 'u04', 'record_count': 34, 'avg_hour': 5.735294117647059, 'avg_rate': 2.0588235294117645}, {'name': 'u05', 'record_count': 6, 'avg_hour': 6.25, 'avg_rate': 1.5}, {'name': 'u07', 'record_count': 24, 'avg_hour': 6.020833333333333, 'avg_rate': 2.0}, {'name': 'u08', 'record_count': 41, 'avg_hour': 5.878048780487805, 'avg_rate': 1.9024390243902438}, {'name': 'u09', 'record_count': 3, 'avg_hour': 6.5, 'avg_rate': 1.6666666666666667}, {'name': 'u10', 'record_count': 47, 'avg_hour': 6.3936170212765955, 'avg_rate': 2.106382978723404}, {'name': 'u12', 'record_count': 26, 'avg_hour': 6.423076923076923, 'avg

## social life

In [8]:
# Define the folder containing the files
social_folder_path = "data/StudentLife/dataset/EMA/response/Social"

basic_cleaning_social_data = cleaning.clean_data_from_jsons_folder(social_folder_path)
print(basic_cleaning_social_data[0])

{'file_name': 'Social_u00.json', 'record_count': 44, 'keys': ['null', 'resp_time'], 'data': [{'location': 'Unknown', 'number': '5', 'resp_time': 1364264483}, {'location': '43.70759062,-72.28510298', 'number': '1', 'resp_time': 1364338301}, {'location': '43.70697406,-72.28758978', 'number': '1', 'resp_time': 1364328798}, {'location': '43.70708859,-72.28753397', 'number': '3', 'resp_time': 1365023797}, {'location': '43.70863158,-72.2842741', 'number': '4', 'resp_time': 1364425743}, {'location': '43.70517792,-72.28685304', 'number': '3', 'resp_time': 1364437520}, {'location': '43.70546998,-72.28794136', 'number': '4', 'resp_time': 1364573023}, {'location': '43.70674964,-72.2875834', 'number': '4', 'resp_time': 1364534048}, {'location': '43.70674964,-72.2875834', 'number': '4', 'resp_time': 1364536801}, {'location': '43.70664323,-72.28735129', 'number': '4', 'resp_time': 1364623648}, {'location': '43.75941375,-72.32875076', 'number': '2', 'resp_time': 1364710284}, {'location': '43.75907887

In [9]:

# פונקציה ליצירת מילון מרכזי עבור הסטודנטים
def create_final_social_dictionary(cleaned_data):
    final_data = []

    for student in cleaned_data:
        # חיתוך ה-name כך שיישאר רק חלק ה-u00
        name = student['file_name'].split('_')[1].split('.')[0]
        
        # שמירת record count ללא שינוי
        record_count = student['record_count']
        
        # עיבוד ה-data כך שישארו רק השדות hour ו-rate
        cleaned_data = [
            {key: obj[key] for key in ['number'] if key in obj}
            for obj in student['data']
        ]
        
        # יצירת המילון החדש עבור כל סטודנט
        final_data.append({
            'name': name,
            'record_count': record_count,
            'data': cleaned_data
        })

    return final_data


#call the function
final_social_data = create_final_social_dictionary(basic_cleaning_social_data)

#just checking
print(len(final_social_data))
print(final_social_data)

49
[{'name': 'u00', 'record_count': 44, 'data': [{'number': '5'}, {'number': '1'}, {'number': '1'}, {'number': '3'}, {'number': '4'}, {'number': '3'}, {'number': '4'}, {'number': '4'}, {'number': '4'}, {'number': '4'}, {'number': '2'}, {'number': '2'}, {'number': '3'}, {'number': '2'}, {'number': '4'}, {'number': '5'}, {'number': '4'}, {'number': '5'}, {'number': '4'}, {'number': '3'}, {'number': '4'}, {'number': '3'}, {'number': '4'}, {'number': '3'}, {'number': '2'}, {'number': '3'}, {'number': '4'}, {'number': '4'}, {'number': '3'}, {'number': '4'}, {'number': '3'}, {'number': '4'}, {'number': '2'}, {'number': '1'}, {'number': '2'}, {'number': '2'}, {'number': '2'}, {'number': '2'}, {'number': '2'}, {'number': '3'}, {'number': '2'}, {'number': '1'}, {'number': '1'}, {'number': '2'}]}, {'name': 'u01', 'record_count': 18, 'data': [{'number': '4'}, {'number': '4'}, {'number': '2'}, {'number': '2'}, {'number': '4'}, {'number': '3'}, {'number': '4'}, {'number': '4'}, {'number': '3'}, {'n

## grades

In [27]:
file_path = "data/StudentLife\dataset\education\grades.csv"
grades_data = pd.read_csv(file_path)
print(grades_data)

    uid   gpa all   gpa 13s     cs 65
0   u01     2.863  1.777778  3.000000
1   u02     3.505  4.000000  4.000000
2   u04     3.029  3.500000  4.000000
3   u05     3.679  3.777778  4.000000
4   u07     3.474  4.000000  4.000000
5   u08     3.705  3.333333  4.000000
6   u09     3.806  3.777778  3.666667
7   u10     3.667  3.777778  4.000000
8   u12     3.245  2.583333  2.333333
9   u14     3.293  3.888889  3.666667
10  u15     2.815  3.333333  3.666667
11  u16     3.373  4.000000  4.000000
12  u17     3.476  3.333333  4.000000
13  u18     3.474  3.333333  3.333333
14  u19     3.947  3.777778  3.333333
15  u22     3.889  3.916667  4.000000
16  u24     2.987  3.222222  3.333333
17  u25     2.765  3.333333  3.666667
18  u27     3.719  3.333333  3.000000
19  u30     3.930  3.916667  4.000000
20  u32     3.826  3.888889  4.000000
21  u33     2.815  2.777778  4.000000
22  u41     3.652  3.777778  4.000000
23  u43     3.790  4.000000  4.000000
24  u46     3.646  1.111111  4.000000
25  u49     

  file_path = "data/StudentLife\dataset\education\grades.csv"


In [31]:
name = grades_data['uid']
print(name)

0     u01
1     u02
2     u04
3     u05
4     u07
5     u08
6     u09
7     u10
8     u12
9     u14
10    u15
11    u16
12    u17
13    u18
14    u19
15    u22
16    u24
17    u25
18    u27
19    u30
20    u32
21    u33
22    u41
23    u43
24    u46
25    u49
26    u52
27    u54
28    u57
29    u59
Name: uid, dtype: object


In [32]:
def create_grades_dictionary(grades_data):
    final_grades_data = []
    for student in range (len(grades_data)):
        name = grades_data['uid'][student]
        gpaAll = grades_data[' gpa all'][student]
        gpa13s = grades_data[' gpa 13s'][student]
        cs65 = grades_data[' cs 65'][student]
        final_grades_data.append({
            'name': name,
            'gpaAll': gpaAll,
            'gpa 13s': gpa13s,
            'cs 65': cs65
        })
    return final_grades_data

#call the function
final_grades_data = create_grades_dictionary(grades_data)

#just checking
print(len(final_grades_data))
print(final_grades_data)

30
[{'name': 'u01', 'gpaAll': np.float64(2.863), 'gpa 13s': np.float64(1.777777778), 'cs 65': np.float64(3.0)}, {'name': 'u02', 'gpaAll': np.float64(3.505), 'gpa 13s': np.float64(4.0), 'cs 65': np.float64(4.0)}, {'name': 'u04', 'gpaAll': np.float64(3.029), 'gpa 13s': np.float64(3.5), 'cs 65': np.float64(4.0)}, {'name': 'u05', 'gpaAll': np.float64(3.679), 'gpa 13s': np.float64(3.777777778), 'cs 65': np.float64(4.0)}, {'name': 'u07', 'gpaAll': np.float64(3.474), 'gpa 13s': np.float64(4.0), 'cs 65': np.float64(4.0)}, {'name': 'u08', 'gpaAll': np.float64(3.705), 'gpa 13s': np.float64(3.333333333), 'cs 65': np.float64(4.0)}, {'name': 'u09', 'gpaAll': np.float64(3.806), 'gpa 13s': np.float64(3.777777778), 'cs 65': np.float64(3.666666667)}, {'name': 'u10', 'gpaAll': np.float64(3.667), 'gpa 13s': np.float64(3.777777778), 'cs 65': np.float64(4.0)}, {'name': 'u12', 'gpaAll': np.float64(3.245), 'gpa 13s': np.float64(2.583333333), 'cs 65': np.float64(2.333333333)}, {'name': 'u14', 'gpaAll': np.flo

In [33]:
sleepToGrades = []
#sleep_data = create_sleep_dictionary()
grades_data = create_grades_dictionary(grades_data)

for i in range (59):
    name =  f"u{i:02d}"

{'name': 'u01', 'gpaAll': np.float64(2.863), 'gpa 13s': np.float64(1.777777778), 'cs 65': np.float64(3.0)}


'\nfor i in range (59):\n    name =  f"u{i:02d}"\n    if (grades_data[i][name] is None):\n        print(name)\n        '