# Cleaning donor info

In [1]:
import pandas as pd
import csv 

In [28]:
path_name = '/Users/dionnespaltman/Desktop/V4/FAINT_Info_Personality_timepoints.csv'
donor_info = pd.read_csv(path_name)

# Unique IDs

Check how many unique IDs we have. We have 328. 

In [29]:
unique_id_count = donor_info['ID'].nunique()
print("Number of unique IDs:", unique_id_count)

Number of unique IDs: 328


At a later stage, we will delete the 8 IDs that only have 1 VVR measurement. This means that they didn't start with the donation. 

In [30]:
# Group by ID and count occurrences
id_counts = donor_info.groupby('ID').size().reset_index(name='count')

# Group by count and count occurrences
count_distribution = id_counts.groupby('count').size().reset_index(name='ID_count')

print(count_distribution)

   count  ID_count
0      1         8
1      6       191
2      7       129


Depending on the location, some participants didn't measure their VVR at stage 3. 

In [31]:
stage_counts = donor_info['Time_point'].value_counts().reset_index()
stage_counts.columns = ['Timepoint', 'Occurrences']
timepoint_counts = stage_counts.sort_values(by='Timepoint')

print(timepoint_counts)

   Timepoint  Occurrences
1        1.0          320
2        2.0          320
6        3.0          128
3        4.0          320
4        5.0          320
0        6.0          321
5        7.0          320


# Dropping columns 

There are a lot of columns that are not interesting for the current research. So these will be dropped. 

In [32]:
columns = list(donor_info.columns)
print(columns)

['ID', 'Time_point', 'Gender', 'Age', 'Datum', 'Location', 'Condition', 'Weight', 'Length', 'FEAR_sum', 'VVR_sum_tp', 'VVR_psych_tp', 'VVR_phys_tp', 'ERQ_sum', 'ERQ_CR', 'ERQ_ES', 'SSAS_sum', 'ASI_sum', 'MAIA_sum', 'MAIA_noticing_mean', 'MAIA_notdistr_mean', 'MAIA_notworry_mean', 'MAIA_attregul_mean', 'MAIA_emoaware_mean', 'MAIA_selfregu_mean', 'MAIA_bodylist_mean', 'MAIA_trust_mean', 'MAIA_noticing_sum', 'MAIA_notdistr_sum', 'MAIA_notworry_sum', 'MAIA_attregul_sum', 'MAIA_emoaware_sum', 'MAIA_selfregu_sum', 'MAIA_bodylist_sum', 'MAIA_trust_sum', 'Q2.2_1', 'Q2.2_2', 'Q2.2_3', 'Q2.2_4', 'Q2.2_5', 'Q2.2_6', 'Q2.2_7', 'Q2.2_8', 'Q2.2_9', 'Q2.2_10', 'Q3.1_1', 'Q3.1_2', 'Q3.1_3', 'Q3.1_4', 'Q3.1_5', 'Q3.1_6', 'Q3.1_7', 'Q3.1_8', 'Q3.2_1', 'Q3.2_2', 'Q3.2_3', 'Q3.2_4', 'Q3.2_5', 'Q3.2_6', 'Q3.2_7', 'Q3.2_8', 'Q4.1_1', 'Q4.1_2', 'Q4.1_3', 'Q4.1_4', 'Q4.1_5', 'Q4.1_6', 'Q4.1_7', 'Q4.1_8', 'Q4.1_9', 'Q4.1_10', 'Q5.1_1', 'Q5.1_2', 'Q5.1_3', 'Q5.1_4', 'Q5.1_5', 'Q5.1_6', 'Q5.1_7', 'Q5.1_8', 'Q5.1

In [33]:
# Columns to drop
columns_to_drop = ['FEAR_sum', 'VVR_sum_tp', 'VVR_psych_tp', 'VVR_phys_tp', 'ERQ_sum', 'ERQ_CR', 'ERQ_ES', 'SSAS_sum', 'ASI_sum', 'MAIA_sum', 
                   'MAIA_noticing_mean', 'MAIA_notdistr_mean', 'MAIA_notworry_mean', 'MAIA_attregul_mean', 'MAIA_emoaware_mean', 'MAIA_selfregu_mean', 
                   'MAIA_bodylist_mean', 'MAIA_trust_mean', 'MAIA_noticing_sum', 'MAIA_notdistr_sum', 'MAIA_notworry_sum', 'MAIA_attregul_sum',
                     'MAIA_emoaware_sum', 'MAIA_selfregu_sum', 'MAIA_bodylist_sum', 'MAIA_trust_sum', 'Q2.2_1', 'Q2.2_2', 'Q2.2_3', 'Q2.2_4', 
                     'Q2.2_5', 'Q2.2_6', 'Q2.2_7', 'Q2.2_8', 'Q2.2_9', 'Q2.2_10', 'Q3.1_1', 'Q3.1_2', 'Q3.1_3', 'Q3.1_4', 'Q3.1_5', 'Q3.1_6', 
                     'Q3.1_7', 'Q3.1_8', 'Q3.2_1', 'Q3.2_2', 'Q3.2_3', 'Q3.2_4', 'Q3.2_5', 'Q3.2_6', 'Q3.2_7', 'Q3.2_8', 'Q4.1_1', 'Q4.1_2', 
                     'Q4.1_3', 'Q4.1_4', 'Q4.1_5', 'Q4.1_6', 'Q4.1_7', 'Q4.1_8', 'Q4.1_9', 'Q4.1_10', 'Q5.1_1', 'Q5.1_2', 'Q5.1_3', 'Q5.1_4', 
                     'Q5.1_5', 'Q5.1_6', 'Q5.1_7', 'Q5.1_8', 'Q5.1_9', 'Q5.1_10', 'Q5.1_11', 'Q5.1_12', 'Q5.1_13', 'Q5.1_14', 'Q5.1_15', 'Q5.1_16', 
                     'Q5.1_17', 'Q5.1_18', 'Q5.1_19', 'Q5.1_20', 'Q5.1_21', 'Q5.1_22', 'Q5.1_23', 'Q5.1_24', 'Q5.1_25', 'Q5.1_26', 'Q5.1_27', 'Q5.1_28', 
                     'Q5.1_29', 'Q5.1_30', 'Q5.1_31', 'Q5.1_32', 'Q5.1_33', 'Q5.1_34', 'Q5.1_35', 'Q5.1_36', 'Q5.1_37', 'Q14_1', 'Q14_2', 'Q14_3', 'Q14_4', 
                     'Q14_5', 'Q14_6', 'Q14_12', 'Q16_2', 'Q14_33', 'Q14_44', 'Q14_55', 'Q14_66', 'Q7.1_1', 'Q7.1_2', 'Q7.1_3', 'Q7.1_4', 'Q7.1_5', 'Q7.1_6', 
                     'Q7.1_7', 'Q7.1_8', 'Q7.1_9', 'Q7.1_10', 'Q7.1_11', 'Q7.1_12', 'Q7.1_13', 'Q7.1_14', 'Q7.1_15', 'Q7.1_16', 'Faintnessn', 'Dizzinessn',
                       'Weaknessn', 'Lightheadednessn', 'Fearn', 'Tensionn', 'Stressn', 'Nervousnessn', 'na.rm']

# Drop the columns
donor_info.drop(columns=columns_to_drop, inplace=True)

In [34]:
columns = list(donor_info.columns)
print(columns)

['ID', 'Time_point', 'Gender', 'Age', 'Datum', 'Location', 'Condition', 'Weight', 'Length', 'Faintness', 'Dizziness', 'Weakness', 'Lightheadedness', 'Fear', 'Tension', 'Stress', 'Nervousness', 'Physical_sum', 'Psychological_sum']


# Renaming columns 

In [35]:
donor_info.rename(columns={'Datum': 'Date'}, inplace=True)
donor_info.rename(columns={'Time_point': 'Stage'}, inplace=True)

In [None]:
donor_info['Date'] = pd.to_datetime(donor_info['Date'])

In [37]:
columns = list(donor_info.columns)
print(columns)

['ID', 'Stage', 'Gender', 'Age', 'Date', 'Location', 'Condition', 'Weight', 'Length', 'Faintness', 'Dizziness', 'Weakness', 'Lightheadedness', 'Fear', 'Tension', 'Stress', 'Nervousness', 'Physical_sum', 'Psychological_sum']


# Deleting the 8 ones that are not complete 

Like we stated previously, we will now delete the 8 IDs that only have VVR measurements from stage 1. 

In [39]:
# Group by ID and count occurrences
id_counts = donor_info.groupby('ID').size().reset_index(name='count')

# Filter out IDs that occur only once
ids_to_keep = id_counts[id_counts['count'] > 1]['ID']

# Filter out the rows corresponding to IDs that occur more than once
donor_info = donor_info[donor_info['ID'].isin(ids_to_keep)]

# Saving the cleaned file

Let's double check if the file we will save is correct. 

In [40]:
print(donor_info.shape)
display(donor_info.head(5))

(2049, 19)


Unnamed: 0,ID,Stage,Gender,Age,Date,Location,Condition,Weight,Length,Faintness,Dizziness,Weakness,Lightheadedness,Fear,Tension,Stress,Nervousness,Physical_sum,Psychological_sum
0,5,1.0,2,33.0,2019-10-21,0,1,74.0,171.0,1.0,1.0,1.0,1.0,2.0,2.0,2.0,2.0,4.0,8.0
1,5,2.0,2,33.0,2019-10-21,0,1,74.0,171.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,6.0
2,5,3.0,2,33.0,2019-10-21,0,1,74.0,171.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,6.0
3,5,4.0,2,33.0,2019-10-21,0,1,74.0,171.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,4.0,5.0
4,5,5.0,2,33.0,2019-10-21,0,1,74.0,171.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,4.0,5.0


In [41]:
unique_id_count = donor_info['ID'].nunique()
print("Number of unique IDs:", unique_id_count)

Number of unique IDs: 320


In [43]:
donor_info.to_csv('/Users/dionnespaltman/Desktop/V4/VVR_measurements/VVR_measurements.csv', sep=',')