In [1]:
# Import dependencies
import pandas as pd
from pathlib import Path
import numpy as np

In [2]:
# Upload file, convert to dataframe
attendance_df = pd.read_csv('../CSP_Data/activities_attendance.csv')
attendance_df

Unnamed: 0,Attendance ID,Activity Name,Activity Instance ID,Activity Label,Person ID,Date,Attended (y/n),Minutes Attended,Lead Staff Name,Individual Activity - Notes,Individual Activity - Referral Service,Individual Activity - Referral Agency,Individual Activity - Referral Date
0,,Surfing 101 Middle School,3383538,Aptos Surfing 101 Course,46887178,9/10/2018,Y,360.0,,,,,
1,,Surfing 101 Middle School,3383538,Aptos Surfing 101 Course,46887178,9/17/2018,Y,360.0,,,,,
2,,Surfing 101 Middle School,3383538,Aptos Surfing 101 Course,46887178,9/24/2018,Y,360.0,,,,,
3,,Surfing 101 Middle School,3383538,Aptos Surfing 101 Course,46887178,10/1/2018,Y,360.0,,,,,
4,,Surfing 101 Middle School,3383538,Aptos Surfing 101 Course,46887178,10/15/2018,Y,360.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5556,,Surfers In Leadership Training (SILT),3621424,SILT SUMMER CAMP SESSION 2: Booker T. Washington,47411223,6/26/2019,N,0.0,,,,,
5557,,Surfers In Leadership Training (SILT),3621424,SILT SUMMER CAMP SESSION 2: Booker T. Washington,47411223,6/27/2019,N,0.0,,,,,
5558,,Surfers In Leadership Training (SILT),3621424,SILT SUMMER CAMP SESSION 2: Booker T. Washington,47433659,6/25/2019,Y,420.0,,,,,
5559,,Surfers In Leadership Training (SILT),3621424,SILT SUMMER CAMP SESSION 2: Booker T. Washington,47433659,6/26/2019,Y,420.0,,,,,


In [3]:
# Check data types
attendance_df.dtypes

Attendance ID                             float64
Activity Name                              object
Activity Instance ID                        int64
Activity Label                             object
Person ID                                   int64
Date                                       object
Attended (y/n)                             object
Minutes Attended                          float64
Lead Staff Name                            object
Individual Activity - Notes               float64
Individual Activity - Referral Service    float64
Individual Activity - Referral Agency     float64
Individual Activity - Referral Date       float64
dtype: object

In [4]:
# Drop columns we do not want in schema
attendance_df = attendance_df.drop(columns = ["Attendance ID", "Activity Label", "Lead Staff Name", "Individual Activity - Notes", "Individual Activity - Referral Service", "Individual Activity - Referral Agency", "Individual Activity - Referral Date"])
attendance_df.head()

Unnamed: 0,Activity Name,Activity Instance ID,Person ID,Date,Attended (y/n),Minutes Attended
0,Surfing 101 Middle School,3383538,46887178,9/10/2018,Y,360.0
1,Surfing 101 Middle School,3383538,46887178,9/17/2018,Y,360.0
2,Surfing 101 Middle School,3383538,46887178,9/24/2018,Y,360.0
3,Surfing 101 Middle School,3383538,46887178,10/1/2018,Y,360.0
4,Surfing 101 Middle School,3383538,46887178,10/15/2018,Y,360.0


In [5]:
# Rename columns
attendance_df = attendance_df.rename(columns={
                          'Person ID':'Participant ID',
                            'Attended (y/n)':'Attended'
})
attendance_df

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
0,Surfing 101 Middle School,3383538,46887178,9/10/2018,Y,360.0
1,Surfing 101 Middle School,3383538,46887178,9/17/2018,Y,360.0
2,Surfing 101 Middle School,3383538,46887178,9/24/2018,Y,360.0
3,Surfing 101 Middle School,3383538,46887178,10/1/2018,Y,360.0
4,Surfing 101 Middle School,3383538,46887178,10/15/2018,Y,360.0
...,...,...,...,...,...,...
5556,Surfers In Leadership Training (SILT),3621424,47411223,6/26/2019,N,0.0
5557,Surfers In Leadership Training (SILT),3621424,47411223,6/27/2019,N,0.0
5558,Surfers In Leadership Training (SILT),3621424,47433659,6/25/2019,Y,420.0
5559,Surfers In Leadership Training (SILT),3621424,47433659,6/26/2019,Y,420.0


In [6]:
# Value counts for each program
attendance_df["Activity Name"].value_counts()

Surfing 101                              4981
Surfing 101 Middle School                 338
Surfers In Leadership Training (SILT)     242
Name: Activity Name, dtype: int64

In [7]:
attendance_df["Activity Instance ID"].value_counts()

3384580    3185
3417045     499
3417059     486
3474515     237
3508595     108
3508583      88
3517784      85
3421146      82
3476979      72
3383538      70
3621424      60
3621419      60
3472807      55
3621418      30
3440566      29
3473499      24
3476373      23
3527022      20
3474149      18
3528219      17
3429906      17
3476388      17
3440661      16
3476343      15
3475642      14
3499217      14
3420417      14
3620598      14
3535966      13
3620940      12
3420508      12
3474121      12
3538029      12
3528375      11
3526992      10
3621415      10
3535885      10
3536052       9
3499320       9
3454339       8
3488181       8
3528332       8
3499380       8
3538008       7
3620656       6
3480978       5
3620600       5
3499367       5
3527068       4
3620979       4
3476438       4
Name: Activity Instance ID, dtype: int64

In [8]:
# Find any repeat participants
repeat_participant = attendance_df.groupby(["Participant ID"])
repeat_participant.agg({"Activity Instance ID": "nunique"})

Unnamed: 0_level_0,Activity Instance ID
Participant ID,Unnamed: 1_level_1
46842888,3
46843252,1
46843305,1
46843348,1
46843349,2
...,...
47793648,1
47814984,1
47835429,1
47878255,2


In [9]:
# Found that there are 427 more students included on this sheet than the student profile sheet 
# Will need to drop attendees who are not included in the students demographic profile sheet
# when perfomring merge between the 2 tables

# This exploratory pull of a weird student shows that it is possible to attend 16 different activities
# this can be explained if a student only attends one off field trips or attends field trips in addition to 
# surfing 101 courses

weird_student = attendance_df[attendance_df["Participant ID"] == 46847012]
weird_student.head()


Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
270,Surfing 101,3384580,46847012,2/8/2019,Y,300.0
271,Surfing 101,3384580,46847012,2/11/2019,Y,180.0
272,Surfing 101,3384580,46847012,2/13/2019,Y,180.0
273,Surfing 101,3384580,46847012,2/15/2019,Y,300.0
274,Surfing 101,3384580,46847012,2/20/2019,Y,180.0


# Create df by Program - SILT / Surfing 101 MS / Surfing 101 HS

In [10]:
# Filter by Program -SILT
is_silt = attendance_df['Activity Name'] == 'Surfers In Leadership Training (SILT)'
is_silt.head()

0    False
1    False
2    False
3    False
4    False
Name: Activity Name, dtype: bool

In [11]:
# Df for silt
silt_df = attendance_df[is_silt]
silt_df.head()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
4266,Surfers In Leadership Training (SILT),3421146,46843398,8/1/2018,N,0.0
4267,Surfers In Leadership Training (SILT),3421146,46843398,8/2/2018,N,0.0
4268,Surfers In Leadership Training (SILT),3421146,46853927,8/1/2018,N,0.0
4269,Surfers In Leadership Training (SILT),3421146,46853927,8/2/2018,N,0.0
4270,Surfers In Leadership Training (SILT),3421146,46854059,8/1/2018,Y,480.0


In [12]:
## Filter by Program - Middle School 101
is_surfing_101_MS = attendance_df['Activity Name'] == 'Surfing 101 Middle School '
is_surfing_101_MS.head()

0    True
1    True
2    True
3    True
4    True
Name: Activity Name, dtype: bool

In [13]:
# Df for MS Surfing 101
surf_101_MS_df = attendance_df[is_surfing_101_MS]
surf_101_MS_df.head()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
0,Surfing 101 Middle School,3383538,46887178,9/10/2018,Y,360.0
1,Surfing 101 Middle School,3383538,46887178,9/17/2018,Y,360.0
2,Surfing 101 Middle School,3383538,46887178,9/24/2018,Y,360.0
3,Surfing 101 Middle School,3383538,46887178,10/1/2018,Y,360.0
4,Surfing 101 Middle School,3383538,46887178,10/15/2018,Y,360.0


In [14]:
# Filter by HS Surfing 101
surf_101_HS = attendance_df['Activity Name'] == 'Surfing 101'
surf_101_HS.head()

0    False
1    False
2    False
3    False
4    False
Name: Activity Name, dtype: bool

In [15]:
# Df for HS surfing 101
surf_101_HS_df = attendance_df[surf_101_HS]
surf_101_HS_df.head()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
70,Surfing 101,3384580,46843252,8/20/2018,Y,180.0
71,Surfing 101,3384580,46843252,8/22/2018,Y,180.0
72,Surfing 101,3384580,46843252,8/24/2018,Y,300.0
73,Surfing 101,3384580,46843252,8/27/2018,Y,180.0
74,Surfing 101,3384580,46843252,8/29/2018,Y,180.0


In [16]:
# Amount of classes attended per students
value_counts_HS = surf_101_HS_df["Participant ID"].value_counts()
value_counts_HS.head()

46848962    106
46875282    104
46876799    103
46869531    102
46863178    102
Name: Participant ID, dtype: int64

In [17]:
# Who attended one class
one_time_HS = value_counts_HS == 1
one_time_HS.head()

46848962    False
46875282    False
46876799    False
46869531    False
46863178    False
Name: Participant ID, dtype: bool

In [18]:
# Isolate students who only attended one class
one_timers_HS = value_counts_HS[one_time_HS]
one_timers_HS.head()

46862359    1
46858392    1
47027338    1
46862041    1
46889250    1
Name: Participant ID, dtype: int64

In [19]:
# Df of Students who only attended one class of Surfing 101 HS
one_timers_HS_df = pd.DataFrame(value_counts_HS[one_time_HS], index=None)
one_timers_HS_df.reset_index(inplace=True) 
one_timers_HS_df = one_timers_HS_df.rename(columns={
                    'index':'Participant ID',
                    'Participant ID':'Classes Attended'
})
one_timers_HS_df.head()

Unnamed: 0,Participant ID,Classes Attended
0,46862359,1
1,46858392,1
2,47027338,1
3,46862041,1
4,46889250,1


In [20]:
# Value counts for Surfing 101 MS 
surf_101_MS_df['Participant ID'].value_counts().head()

46897549    33
46904597    18
46901108    18
46895808    18
46907807    18
Name: Participant ID, dtype: int64

In [21]:
# Values Counts for SILT
silt_df['Participant ID'].value_counts().head()

46876991    12
46864770    12
46878556    10
46848962    10
46864094     9
Name: Participant ID, dtype: int64

#  New DFs based on Activity Instance ID


In [22]:
#Check Dtypes and convert to Datetime
surf_101_MS_df.dtypes


surf_101_MS_df["Date"] = pd.to_datetime(surf_101_MS_df["Date"])


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """


In [23]:
surf_101_MS_df["Activity Instance ID"].value_counts()

3508595    108
3508583     88
3476979     72
3383538     70
Name: Activity Instance ID, dtype: int64

In [24]:
# Sort by Activity Instance ID, 
sem_1 = surf_101_MS_df[surf_101_MS_df["Activity Instance ID"]== 3508595]
sem_1.head()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
5046,Surfing 101 Middle School,3508595,46891989,2019-04-01,N,0.0
5047,Surfing 101 Middle School,3508595,46891989,2019-04-08,N,0.0
5048,Surfing 101 Middle School,3508595,46891989,2019-04-15,N,0.0
5049,Surfing 101 Middle School,3508595,46891989,2019-04-22,Y,225.0
5050,Surfing 101 Middle School,3508595,46891989,2019-04-29,Y,225.0


In [25]:
# Create Data Frames that can be filtered for Attendance percentage 
sem_1_df = pd.DataFrame(sem_1)
sem_1_df.head()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
5046,Surfing 101 Middle School,3508595,46891989,2019-04-01,N,0.0
5047,Surfing 101 Middle School,3508595,46891989,2019-04-08,N,0.0
5048,Surfing 101 Middle School,3508595,46891989,2019-04-15,N,0.0
5049,Surfing 101 Middle School,3508595,46891989,2019-04-22,Y,225.0
5050,Surfing 101 Middle School,3508595,46891989,2019-04-29,Y,225.0


In [26]:
sem_2 = surf_101_MS_df[surf_101_MS_df["Activity Instance ID"]== 3508583 ]
sem_2["Date"].value_counts()

2019-03-18    11
2019-03-11    11
2019-03-04    11
2019-02-25    11
2019-02-18    11
2019-02-11    11
2019-02-04    11
2019-01-28    11
Name: Date, dtype: int64

In [27]:
sem_3 = surf_101_MS_df[surf_101_MS_df["Activity Instance ID"]== 3476979]
sem_3["Date"].value_counts()

2019-01-14    8
2019-01-07    8
2018-12-17    8
2018-12-10    8
2018-12-03    8
2018-11-26    8
2018-11-19    8
2018-11-12    8
2018-11-05    8
Name: Date, dtype: int64

In [28]:
sem_4 = surf_101_MS_df[surf_101_MS_df["Activity Instance ID"]== 3383538]
sem_4["Date"].value_counts()


2018-10-01    10
2018-09-24    10
2018-09-17    10
2018-09-10    10
2018-10-29    10
2018-10-22    10
2018-10-15    10
Name: Date, dtype: int64

In [29]:
# Create New group the MS Surfing 101 Classes by Activity Instance ID
# Going to Create New DF's so I can sort by Activity Instance ID and see & Attended

# Need to group students by Activity Instance ID 
# Then pull the attended Y or N count and make a percentage


## Separating Code


In [30]:
#Sort Participant ID by Total Attended Y or N so we can count the total number of days attended 
# Machine Learning Model

yes = surf_101_MS_df[surf_101_MS_df["Attended"] == "Y"]
no = surf_101_MS_df[surf_101_MS_df["Attended"] == "N"]
yes.head()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
0,Surfing 101 Middle School,3383538,46887178,2018-09-10,Y,360.0
1,Surfing 101 Middle School,3383538,46887178,2018-09-17,Y,360.0
2,Surfing 101 Middle School,3383538,46887178,2018-09-24,Y,360.0
3,Surfing 101 Middle School,3383538,46887178,2018-10-01,Y,360.0
4,Surfing 101 Middle School,3383538,46887178,2018-10-15,Y,360.0


In [31]:
# YES Counts
yes_attended = yes.groupby(["Participant ID"]).count()["Attended"]
no_attended = no.groupby(["Participant ID"]).count()["Attended"].round(0)

yes_attended.head()

Participant ID
46887178    13
46891897     7
46891989    11
46892101     5
46893742     5
Name: Attended, dtype: int64

In [32]:
no_attended.head()

Participant ID
46887178    2
46891989    5
46892101    4
46893742    3
46895808    3
Name: Attended, dtype: int64

In [33]:
#Validating Total Y Count  -- Checks out Participant 46887178 has 13 y and 2 N
surf_101_MS_df[surf_101_MS_df["Participant ID"] == 46887178]

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
0,Surfing 101 Middle School,3383538,46887178,2018-09-10,Y,360.0
1,Surfing 101 Middle School,3383538,46887178,2018-09-17,Y,360.0
2,Surfing 101 Middle School,3383538,46887178,2018-09-24,Y,360.0
3,Surfing 101 Middle School,3383538,46887178,2018-10-01,Y,360.0
4,Surfing 101 Middle School,3383538,46887178,2018-10-15,Y,360.0
5,Surfing 101 Middle School,3383538,46887178,2018-10-22,Y,360.0
6,Surfing 101 Middle School,3383538,46887178,2018-10-29,Y,240.0
4958,Surfing 101 Middle School,3508583,46887178,2019-01-28,N,0.0
4959,Surfing 101 Middle School,3508583,46887178,2019-02-04,N,0.0
4960,Surfing 101 Middle School,3508583,46887178,2019-02-11,Y,240.0


In [34]:
act_id_df = surf_101_MS_df.drop(columns = ["Date", "Attended", "Minutes Attended"])
act_id_df

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID
0,Surfing 101 Middle School,3383538,46887178
1,Surfing 101 Middle School,3383538,46887178
2,Surfing 101 Middle School,3383538,46887178
3,Surfing 101 Middle School,3383538,46887178
4,Surfing 101 Middle School,3383538,46887178
...,...,...,...
5149,Surfing 101 Middle School,3508595,46908729
5150,Surfing 101 Middle School,3508595,46908729
5151,Surfing 101 Middle School,3508595,46908729
5152,Surfing 101 Middle School,3508595,46908729


In [35]:
# Make New DF to create new Surfing 101 Total Days Attended DF 
surf_101_MS_total_days_attended_df = pd.DataFrame({
    "Activity Name": "Surfing 101 Middle School",
    "Attended_Y": yes_attended,
    "Attended_N": no_attended
})
surf_101_MS_total_days_attended_df

Unnamed: 0_level_0,Activity Name,Attended_Y,Attended_N
Participant ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
46887178,Surfing 101 Middle School,13,2.0
46891897,Surfing 101 Middle School,7,
46891989,Surfing 101 Middle School,11,5.0
46892101,Surfing 101 Middle School,5,4.0
46893742,Surfing 101 Middle School,5,3.0
46894320,Surfing 101 Middle School,16,
46895808,Surfing 101 Middle School,15,3.0
46896561,Surfing 101 Middle School,7,1.0
46897422,Surfing 101 Middle School,9,
46897487,Surfing 101 Middle School,7,


In [36]:
# Replace NaN values with 0 since they are students who did not miss an attendance
surf_101_MS_total_days_attended_df['Attended_N'] = surf_101_MS_total_days_attended_df['Attended_N'].fillna(0)
surf_101_MS_total_days_attended_df.head()

Unnamed: 0_level_0,Activity Name,Attended_Y,Attended_N
Participant ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
46887178,Surfing 101 Middle School,13,2.0
46891897,Surfing 101 Middle School,7,0.0
46891989,Surfing 101 Middle School,11,5.0
46892101,Surfing 101 Middle School,5,4.0
46893742,Surfing 101 Middle School,5,3.0


In [37]:
# Check and Change DTYPE to INT
surf_101_MS_total_days_attended_df.dtypes
surf_101_MS_total_days_attended_df["Attended_N"].astype(int)

Participant ID
46887178    2
46891897    0
46891989    5
46892101    4
46893742    3
46894320    0
46895808    3
46896561    1
46897422    0
46897487    0
46897549    2
46897577    2
46898292    3
46898738    1
46898818    0
46898869    0
46899499    2
46899585    5
46900269    4
46900329    1
46901108    2
46901198    1
46904597    1
46906175    1
46906440    2
46907807    5
46908206    3
46908729    6
Name: Attended_N, dtype: int32

In [38]:
attendance_df.drop_duplicates()

Unnamed: 0,Activity Name,Activity Instance ID,Participant ID,Date,Attended,Minutes Attended
0,Surfing 101 Middle School,3383538,46887178,9/10/2018,Y,360.0
1,Surfing 101 Middle School,3383538,46887178,9/17/2018,Y,360.0
2,Surfing 101 Middle School,3383538,46887178,9/24/2018,Y,360.0
3,Surfing 101 Middle School,3383538,46887178,10/1/2018,Y,360.0
4,Surfing 101 Middle School,3383538,46887178,10/15/2018,Y,360.0
...,...,...,...,...,...,...
5556,Surfers In Leadership Training (SILT),3621424,47411223,6/26/2019,N,0.0
5557,Surfers In Leadership Training (SILT),3621424,47411223,6/27/2019,N,0.0
5558,Surfers In Leadership Training (SILT),3621424,47433659,6/25/2019,Y,420.0
5559,Surfers In Leadership Training (SILT),3621424,47433659,6/26/2019,Y,420.0


# Creating Loop to find repeat students

In [39]:
# Create a for loop to loop through data and find students who have attended more than one activity 
# Originally tried to separate each activity but found that grouping by Activity Instance ID and then counting
# whether the participant attended turned out to be easier and more of what we were looking for
# Using the DF below, we will be able to merge this DF with our 2 other tables- schools_complete, and students_demo
# Students Demo will include a column where we count the total number of unique courses a student attends using a groupby and creating a new column
# When we merge schools complete with 

groupby_att = attendance_df.groupby("Activity Instance ID")
big_list = []

for activity, df in groupby_att:
    participant_ids = set(df["Participant ID"].tolist())
    for participant in participant_ids:
        record = {"Activity Instance ID": activity, 
                "Participant ID": participant,
                 "Attendance": 1}
        big_list.append(record)
    

In [40]:
# This DF could be used to see which students participate in Field Trips and semester long programs
# Need to use a SQL query

new_attendance_df = pd.DataFrame(big_list)

new_attendance_df.head()



Unnamed: 0,Activity Instance ID,Participant ID,Attendance
0,3383538,46906440,1
1,3383538,46897577,1
2,3383538,46887178,1
3,3383538,46897549,1
4,3383538,46897487,1


In [41]:
# # Need to create a new dataframe with the sum included 
# sums_of_students = new_attendance_df.groupby("Participant ID")["Attendance"].sum()

# sums_df = pd.DataFrame({
#     "Activity Instance ID": ,
#     "Attended_Y": yes_attended,
#     "Attended_N": no_attended
# })

In [42]:
sums_of_students = new_attendance_df.groupby("Participant ID")["Attendance"].count()
sums_of_students.head()

Participant ID
46842888    3
46843252    1
46843305    1
46843348    1
46843349    2
Name: Attendance, dtype: int64

In [43]:
# Create a dataframe that can be merged with Student Demographic and show a count of the number of different activities that a student attended
student_activity_count_df = new_attendance_df.groupby("Participant ID")["Attendance"].count().to_frame(name= 'Distinct Program Count').reset_index()


len(student_activity_count_df)





427

In [44]:
# Create a dataframe of the number of students that were in each activity to merge with schools complete df
# This can give us an idea of how many students at each school/activity attend certain courses
number_of_students_df = new_attendance_df.groupby("Activity Instance ID")["Attendance"].count().to_frame(name= 'Attendance Count').reset_index()
number_of_students_df

Unnamed: 0,Activity Instance ID,Attendance Count
0,3383538,10
1,3384580,37
2,3417045,18
3,3417059,30
4,3420417,14
5,3420508,12
6,3421146,41
7,3429906,17
8,3440566,29
9,3440661,16


In [45]:
# This is code to separate by activity and see which students attended which activity instance ID
# We found that activity instance ID was not the best way to sort because some students for Surfing 101 at mission high
# had records of attendance for 100 courses, but some had records of attendance 17- this made it hard to find a percentage 
# for the overall attendance 
# given more time, we would separate by semesters and get attendence percentage by semester 

#groupby_att = attendance_df.groupby("Activity Instance ID")
#big_list = []

# for activity, df in groupby_att:


#     number_of_days = len(set(df["Date"].tolist()))
#     attended = df.groupby("Participant ID")["Attended"].value_counts()
#     print(attended)
#     print(number_of_days)
#     print(activity)
#     #df["Percent Attend"] = ((df["Attended_Y"])/(df["Attended_Y"]+ df["Attended_N"]))
#     #df.drop_duplicates(subset = ["Participant ID"], inplace= True)
#     #print(df)