In [1]:
## This is to preprocess the MD_Acute_Bout_Data as per our clients wishes. He asked us to do these steps.

## MD acute bout dataset includes both men with and without muscle dysmorphia (MD; my pilot study). 

## Step 1: Remove folks in the MD acute bout dataset from Group 0 (i.e., non-MD group) and those who answered "No" 
##         to all four eligibility questions. We only want participants from the MD group (i.e., Group 1).
## Step 2: The MD2 database only includes men with MD (which we want), but does still have men who answered "No".
##         These participants who answered "No" and were subsequently ineligible need to be removed. 
## Step 3: Below are the variables of interest we need from at least one (if I note it) but most often both databases,
##         all else can be deleted. May want to do this in each database first, then combine.
'''
 All demographics
 Eligibility questions
 health history
 height and weight (to calculate body mass index)
 All MAQ data (this is the Modifiable Activity Questionnaire and assessed past week physical activity)
 MDI questions 1-16 (this is the muscle dysmorphia inventory)
 REI (will only be for MD2 database folks)
 RS scale (restraint eating)
 BDS (bodybuilding dependence)
 SPAS (social physique anxiety)
 CESD (depression)
 STAI (state-trait anxiety)
 Substance use (labeled su1, su2...su 10 in MD2 database but is labeled by drug name in 
                MD acute bout dataset from marijuana to cigarettes)
 Supplement use (labeled su11_1, su11_2.......su15 in MD2 database but is also labeled by
                 name in MD acute bout dataset from Protein (6 options) to other
'''

# Modules, Libraries, and Imports.
import pandas as pd # Used for data frames and other data frame functions.
import numpy as np # Used for arrays and other array functions as well as some math functions.
pd.set_option('display.max_columns', 300)

# Load in the full dataset
MD2_dataset = pd.read_csv("C:/Users/RRC/Downloads/MD2_Database.csv")

# Make a copy of the full dataset; this is the one we will manipulate while still retaining the full original data
# 229 rows 
MD2_dataset_subset = MD2_dataset
MD2_dataset_subset = MD2_dataset_subset.groupby('screener_id').first().reset_index()

MD2_dataset_subset.head(6)


Unnamed: 0,screener_id,Study_ID,redcap_event_name,redcap_survey_identifier,screener_id_complete,name,phone_number,dob,contact_information_complete,screener1,availibility1,screener2,gender,Subway,AMNewYork,Craigslist,Internet,PostedFlyer,Postcard,Other,age,MDscreener1,MDscreener2,MDscreener3,MDscreener4,CurrentTreatment,SeekingTreatment,Pastdrug,Pastalcohol,CurrentExercise,type_of_exercise___1,type_of_exercise___2,type_of_exercise___3,exercise_frequency,exercise_duration___0,exercise_duration___1,pa_journal,audio_permission,Eligibility,ineligiblity,phone_screener_2_complete,study_id_group,study_id_complete,pt1,pt2,pt3,dob2,pt4___1,pt4___2,pt4___3,pt5,pt6,pt7,pt8,pt9,pt,pt15,pt16,pt10,pt17,pt18,pt11,pt19,pt20,pt12,sesh4,apptdate4,sesh4drop,sesh5,apptdate5,thseshdo,pt21,participant_tracker_complete,MDscreener1sesh1,MDscreener2sesh1,MDscreener3sesh1,MDscreener4sesh1,group,eligibility_rescreen_complete,perceived_muscle_size_timestamp,pms_sesh1,pms_sesh1_2,pms_sesh1_3,pms_sesh1_4,perceived_muscle_size_complete,complete,interview_complete_complete,healthhix_1,healthhix_2,healthhix_3,healthhix_4,healthhix_5,healthhix_6,healthhix_7,healthhix_8,healthhix_9,healthhix_17,healthhix_22,healthhix_23,healthhix_25,healthhix_26,healthhix_27,healthhix_28,health_history_complete,maq_timestamp,maq_date,maq_monday,maq_monday_intensity,maq_monday_duration,maq_monday_other,maq_tuesday,maq_tuesday_intensity,maq_tuesday_duration,maq_tuesday_other,maq_wednesday,maq_wednesday_intensity,maq_wednesday_duration,maq_wednesday_other,maq_thursday,maq_thursday_intensity,maq_thursday_duration,maq_thursday_other,maq_friday,maq_friday_intensity,maq_friday_duration,maq_friday_other,maq_satdurday,maq_satdurday_intensity,maq_satdurday_duration,maq_satdurday_other,maq_sunday,maq_sunday_intensity,maq_sunday_duration,maq_sunday_other,maq_walking,maq_walking_duration,maq1,maq2,maq3,maq3a,maq4,maq5,maq6,maq_season,maq_complete,demographics_timestamp,AgeSesh1,Gendersesh1,Racialgroup,racialgroupother,...,su7,su11___1,su11___2,su11___3,su11___4,su11___5,su11___6,su12___1,su12___2,su12___3,su12___4,su12___5,su12___6,su13___1,su13___2,su13___3,su13___4,su13___5,su13___6,su14___1,su14___2,su14___3,su14___4,su14___5,su14___6,su15,substance_use_complete,auditc_timestamp,audit_1,audit_2,audit_3,auditc_complete,preworkout_timestamp,fs1_session3_pre,fas1_session3_pre,biss1,biss2,biss3,biss4,biss5,biss6,pms_pre1,ee_pre,preworkout_complete,midworkout_timestamp,fs2,fas2,ee_mid,rpe10_1,midworkout_complete,postworkout_timestamp,fs1_session3_post,fas1_session3_post,ee_post,rpe10_post,biss7,biss8,biss9,biss10,biss11,biss12,pms_post1,postworkout_complete,postworkout15minute_delay_timestamp,fs1_session3_delay15,fas1_session3_delay15,biss13,biss14,biss15,biss16,biss17,biss18,pms_post15,postworkout15minute_delay_complete,postworkout30minute_delay_timestamp,fs1_session3_delay30,fas1_session3_delay30,biss19,biss20,biss21,biss22,biss23,biss24,pms_post30,postworkout30minute_delay_complete,mental_health_timestamp,cesd1,cesd2,cesd3,cesd4,cesd5,cesd6,cesd7,cesd8,cesd9,cesd10,stai_y2_21,stai_y2_22,stai_y2_23,stai_y2_24,stai_y2_25,stai_y2_26,stai_y2_27,stai_y2_28,stai_y2_29,stai_y2_30,stai_y2_31,stai_y2_32,stai_y2_33,stai_y2_34,stai_y2_35,stai_y2_36,stai_y2_37,stai_y2_38,stai_y2_39,stai_y2_40,mental_health_complete,pa1_1,lbs,bf,physical_assessment_1_complete,pa1_4,pa1_5,chestr,legr,shoulderra,biceps,physical_assessment_2_complete,pa1_6,pa1_7,physical_assessment_3_complete,contamination_timestamp,contamination1,contamination2,contamination2a,contamination3,contamination_complete,audit_score,cesd_score,stai_score,spasscore,bdsscore,mdiscore,rsscore,mental_health_scores_complete,bisspre_score,bisspost,bisspost15minutescore,biss30minutepostscore,biss_scores_complete
0,502,2001.0,phone_screener_and_arm_1,,0.0,Sam,952-917-9738,1986-06-05,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,31.0,1.0,0.0,1.0,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,3.0,0.0,1.0,1.0,1.0,1.0,,2.0,2001.0,2.0,2000.0,,,,0.0,0.0,0.0,,,,,3.0,,2.0,2018-05-21,,2.0,2018-05-24,,2.0,2018-05-28,,,,,2.0,2018-05-31,,5.0,2.0,1.0,1.0,0.0,1.0,0.0,2.0,,6.0,7.0,7.0,7.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,,2018-05-21,2.0,3.0,90.0,,9.0,,,,2.0,3.0,90.0,,9.0,,,,2.0,3.0,90.0,,9.0,,,,9.0,,,,1.0,180.0,0.0,60.0,0.0,,0.0,0.0,0.0,4.0,2.0,2018-05-21 16:22:40,31.0,1.0,5.0,,...,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,2.0,2018-05-21 16:29:40,3.0,0.0,2.0,2.0,2018-05-28 13:55:42,4.0,2.0,3.0,6.0,4.0,6.0,6.0,6.0,6.0,6.0,2.0,2018-05-28 14:20:05,3.0,5.0,6.0,8.0,2.0,2018-05-28 14:40:56,2.0,5.0,6.0,7.0,7.0,5.0,6.0,7.0,7.0,8.0,7.0,2.0,2018-05-28 14:55:08,4.0,3.0,6.0,6.0,5.0,6.0,5.0,7.0,6.0,2.0,2018-05-28 15:10:12,5.0,1.0,6.0,6.0,6.0,6.0,6.0,6.0,6.0,2.0,2018-05-24 16:09:29,0.0,2.0,1.0,1.0,2.0,0.0,1.0,1.0,1.0,1.0,2.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,2.0,158.84,72.0,10.0,2.0,64.0,104/56,82.5,65.0,40.0,30.0,2.0,77.0,125/66,2.0,,,,,,,,,,,,,,0.0,31.0,40.0,35.0,36.0,0.0
1,506,2002.0,phone_screener_and_arm_1,,0.0,Ricardo,347-304-8995,,2.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,32.0,1.0,1.0,1.0,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,3.0,0.0,1.0,2.0,1.0,1.0,,2.0,2002.0,2.0,,,,,0.0,0.0,0.0,,,,,,,2.0,2018-05-21,,2.0,2018-05-25,,2.0,2018-05-29,,2.0,2018-05-31,,2.0,2018-06-01,,5.0,2.0,1.0,1.0,1.0,1.0,0.0,2.0,,6.0,8.0,8.0,8.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,,2018-05-21,2.0,1.0,120.0,,2.0,2.0,120.0,,3.0,1.0,45.0,,2.0,2.0,120.0,,2.0,2.0,60.0,,9.0,,,,9.0,,,,0.0,,1.0,180.0,0.0,,0.0,0.0,0.0,4.0,2.0,2018-05-21 22:40:26,,1.0,4.0,,...,,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Preworkout,2.0,2018-05-21 22:48:04,0.0,0.0,0.0,2.0,2018-05-29 22:34:22,5.0,4.0,3.0,4.0,2.0,6.0,7.0,2.0,4.0,7.0,2.0,2018-05-29 22:49:52,0.0,4.0,4.0,2.0,2.0,2018-05-29 23:03:00,1.0,4.0,4.0,2.0,2.0,2.0,2.0,4.0,7.0,3.0,4.0,2.0,2018-05-29 23:17:42,-1.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,2.0,2018-05-29 23:32:45,0.0,4.0,5.0,5.0,5.0,5.0,5.0,5.0,4.0,2.0,2018-05-24 22:45:24,1.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,1.0,0.0,3.0,2.0,3.0,3.0,3.0,3.0,2.0,1.0,2.0,3.0,1.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,1.0,2.0,188.53,68.0,18.3,2.0,66.0,133/81,130.0,85.0,55.0,45.0,2.0,64.0,135/82,2.0,,,,,,,,9.0,49.0,46.0,,,20.0,0.0,24.0,20.0,30.0,30.0,0.0
2,508,2003.0,phone_screener_and_arm_1,,0.0,Rob,908-770-3984,,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,26.0,0.0,0.0,1.0,1.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,5.0,0.0,1.0,1.0,1.0,1.0,,2.0,2003.0,2.0,,,,,0.0,0.0,0.0,,,,,,,2.0,2018-05-22,,1.0,2018-05-24,,2.0,2018-05-29,,2.0,2018-05-31,,2.0,2018-06-01,,5.0,2.0,1.0,0.0,1.0,1.0,0.0,2.0,,5.0,7.0,6.0,7.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,,2018-05-22,2.0,2.0,60.0,,9.0,,,,2.0,1.0,35.0,,2.0,1.0,35.0,,2.0,1.0,35.0,,3.0,3.0,45.0,,9.0,,,,0.0,,0.0,75.0,0.0,,0.0,0.0,0.0,4.0,2.0,2018-05-22 12:14:07,26.0,1.0,6.0,Indigenous and European ancestry,...,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,2.0,2018-05-22 12:20:15,2.0,0.0,1.0,2.0,2018-05-29 12:03:05,0.0,3.0,5.0,4.0,6.0,7.0,5.0,7.0,6.0,5.0,2.0,2018-05-29 12:18:32,1.0,4.0,5.0,5.0,2.0,2018-05-29 12:32:23,3.0,5.0,5.0,4.0,7.0,6.0,5.0,7.0,7.0,7.0,7.0,2.0,2018-05-29 12:46:50,3.0,4.0,7.0,7.0,6.0,7.0,6.0,7.0,6.0,2.0,2018-05-29 13:01:50,3.0,3.0,6.0,5.0,5.0,7.0,5.0,7.0,6.0,2.0,2018-05-24 12:09:13,1.0,2.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,3.0,2.0,1.0,2.0,2.0,3.0,1.0,2.0,3.0,2.0,2.0,2.0,2.0,164.39,68.0,14.9,2.0,71.0,118/61,70.0,45.0,50.0,35.0,2.0,66.0,115/60,2.0,,,,,,,3.0,8.0,39.0,29.0,50.0,32.0,13.0,0.0,34.0,39.0,40.0,35.0,0.0
3,512,2004.0,phone_screener_and_arm_1,,0.0,Joel,231-384-0086,,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,28.0,1.0,1.0,1.0,0.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,3.0,0.0,1.0,2.0,2.0,1.0,,2.0,2004.0,2.0,,,,,0.0,0.0,0.0,,,,,,,2.0,2018-06-19,4.0,,,,,,,,,,,,,1.0,2.0,0.0,0.0,0.0,0.0,1.0,2.0,2018-06-19 16:19:18,5.0,7.0,8.0,7.0,2.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,GAD 12 years ago and no longer seeks treatment...,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,2018-06-19 16:17:35,28.0,1.0,5.0,,...,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,2.0,2018-06-19 16:24:35,4.0,0.0,1.0,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,0.0,,,,,,,5.0,,,,38.0,29.0,,0.0,,,,,
4,521,2005.0,phone_screener_and_arm_1,,0.0,David,917-789-3152,,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,22.0,0.0,1.0,0.0,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,3.0,0.0,1.0,2.0,1.0,1.0,,2.0,2005.0,2.0,,,,,0.0,0.0,0.0,,,,,,,2.0,2018-06-01,4.0,,,,,,,,,,,,,,2.0,0.0,0.0,0.0,1.0,1.0,2.0,,7.0,8.0,8.0,8.0,2.0,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,2018-06-01 15:27:02,22.0,1.0,4.0,,...,,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,,,,,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,0.0,,,,,,,,,,0.0,,,,,,,,,,,34.0,11.0,,0.0,,,,,
5,522,2006.0,phone_screener_and_arm_1,,0.0,Giuseppe,347-297-7111,,2.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,26.0,0.0,0.0,1.0,1.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,4.0,0.0,1.0,2.0,1.0,1.0,,2.0,2006.0,2.0,,,,,0.0,0.0,0.0,,,,,,,2.0,2018-06-01,,2.0,2018-06-04,,2.0,2018-06-04,,,,,,,,5.0,2.0,0.0,0.0,1.0,1.0,0.0,2.0,,6.0,7.0,7.0,7.0,2.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,2.0,,2018-06-01,2.0,2.0,75.0,,3.0,2.0,90.0,,2.0,3.0,60.0,,2.0,2.0,50.0,,9.0,,,,9.0,,,,9.0,,,,0.0,,0.0,4.0,0.0,,0.0,0.0,0.0,1.0,2.0,2018-06-01 20:42:11,26.0,1.0,7.0,,...,,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,,2.0,2018-06-01 20:48:24,1.0,2.0,2.0,2.0,2018-06-06 20:36:05,3.0,5.0,7.0,4.0,7.0,6.0,5.0,5.0,6.0,6.0,2.0,2018-06-06 20:51:42,4.0,5.0,5.0,3.0,2.0,2018-06-06 21:05:29,3.0,5.0,4.0,4.0,6.0,4.0,6.0,6.0,7.0,6.0,6.0,2.0,2018-06-06 21:21:02,3.0,4.0,6.0,4.0,6.0,6.0,5.0,6.0,6.0,2.0,2018-06-06 21:34:52,3.0,3.0,5.0,5.0,6.0,6.0,5.0,5.0,6.0,2.0,2018-06-04 22:45:37,2.0,3.0,0.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,2.0,2.0,3.0,4.0,2.0,2.0,3.0,2.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,2.0,152.91,68.0,16.1,2.0,65.0,124/54,122.5,62.5,50.0,35.0,2.0,67.0,112/58,2.0,,,,,,,5.0,12.0,48.0,30.0,43.0,34.0,11.0,0.0,34.0,35.0,33.0,32.0,0.0


In [2]:
## Step 1: Remove folks in the MD acute bout dataset from Group 0 (i.e., non-MD group) and those who answered "No" 
##         to all four eligibility questions. We only want participants from the MD group (i.e., Group 1).

# This code removes rows that have 0 in the group column it should keep the NaN's and 1's
result = MD2_dataset_subset[MD2_dataset_subset['group'] == 0.0]

# result = pd.concat([df1], axis=0)
result.shape


(26, 421)

In [3]:
# Check for duplicate rows based on the 'ID' column
duplicate_mask = result.duplicated(subset = 'screener_id', keep='first')

# The 'keep' parameter determines which duplicates are considered 'first' and which are considered 'last'. 
# 'first' keeps the first occurrence, 'last' keeps the last occurrence. You can adjust this as needed.

# Remove duplicate rows
df_no_duplicates = result[~duplicate_mask]

# The '~' operator is used to filter out the rows marked as duplicates in the 'duplicate_mask'.
MD2_dataset_subset2 = df_no_duplicates

print(MD2_dataset_subset2.shape)

MD2_dataset_subset3 = MD2_dataset_subset2

(26, 421)


In [4]:
'''##### DO NOT RUN THIS CHUNKKKKK ############

## Get rid of timestaps and and completes

## Choosing which variables I want as per client
# All demographics
cols_keep = ['screener_id', 'group', 'Eligibility', 'Study_ID', 'redcap_event_name']
cols_keep2 = ['demographics_timestamp', 'AgeSesh1', 'Gendersesh1', 'Racialgroup', 'racialgroupother',
              'EthnicitySesh1', 'LivingSituation', 'LivingOther', 'LivingOther2', 'Education', 'Employment',
              'EmploymentOther', 'Income', 'SexualOrientation', 'demographics_complete'
              'MDscreener1', 'MDscreener2', 'MDscreener3', 'MDscreener4', 'CurrentTreatment', 'SeekingTreatment',
              'Pastdrug', 'Pastalcohol', 'CurrentExercise', 'type_of_exercise___1', 'type_of_exercise___2',
              'type_of_exercise___3', 'exercise_frequency', 'exercise_duration___0', 'exercise_duration___1']

# Eligibility questions
# health history 

cols_keep3 = ['healthhix_1', 'healthhix_2', 'healthhix_3', 'healthhix_4', 'healthhix_5', 'healthhix_6', 'healthhix_7',
             'healthhix_8', 'healthhix_9', 'healthhix_17', 'healthhix_22', 'healthhix_23', 'healthhix_25', 'healthhix_26',
             'healthhix_27', 'healthhix_28', ]

# height and weight (to calculate body mass index)
# lbs is HEIGHT 
# pa1_1 is WEIGHT
cols_keep4 = ['lbs', 'pa1_1']

# All MAQ data (this is the Modifiable Activity Questionnaire and assessed past week physical activity)
cols_keep5 = ['maq_monday', 'maq_monday_intensity', 'maq_monday_duration', 'maq_monday_other',
              'maq_tuesday', 'maq_tuesday_intensity', 'maq_tuesday_duration', 'maq_tuesday_other',
              'maq_wednesday','maq_wednesday_intensity', 'maq_wednesday_duration', 'maq_wednesday_other',
              'maq_thursday', 'maq_thursday_intensity', 'maq_thursday_duration', 'maq_thursday_other',
              'maq_friday', 'maq_friday_intensity', 'maq_friday_duration', 'maq_friday_other',
              'maq_satdurday', 'maq_satdurday_intensity', 'maq_satdurday_duration', 'maq_satdurday_other',
              'maq_sunday', 'maq_sunday_intensity', 'maq_sunday_duration', 'maq_sunday_other',
              'maq_walking', 'maq_walking_duration', 'maq1', 'maq2', 'maq3', 'maq3a', 'maq4', 'maq5', 'maq6']

# MDI questions 1-16 (this is the muscle dysmorphia inventory)
cols_keep6 = ['mdi1', 'mdi2', 'mdi3', 'mdi4', 'mdi5', 'mdi6', 'mdi7', 'mdi8', 'mdi9', 'mdi10', 'mdi11',
              'mdi12', 'mdi13', 'mdi14', 'mdi15', 'mdi16']


# RS scale (restraint eating)
cols_keep8 = ['rs1', 'rs2', 'rs3', 'rs4', 'rs5', 'rs6', 'rs7', 'rs8', 'rs9', 'rs10']

# BDS (bodybuilding dependence)
cols_keep9 = ['bds1', 'bds2', 'bds3', 'bds4', 'bds5', 'bds6', 'bds7', 'bds8', 'bds9']

# SPAS (social physique anxiety)
cols_keep10 = ['spas_1', 'spas_2', 'spas_3', 'spas_4', 'spas_5', 'spas_6', 'spas_7', 'spas_8', 'spas_9',
             'spas_10', 'spas_11', 'spas_12']

# CESD (depression)
cols_keep11 = ['cesd1', 'cesd2', 'cesd3', 'cesd4', 'cesd5', 'cesd6', 'cesd7', 'cesd8', 'cesd9',
              'cesd10']

# STAI (state-trait anxiety)
cols_keep12 = ['stai_y2_21', 'stai_y2_22','stai_y2_23', 'stai_y2_24', 'stai_y2_25', 'stai_y2_26', 'stai_y2_27',
              'stai_y2_28', 'stai_y2_29', 'stai_y2_30', 'stai_y2_31', 'stai_y2_32', 'stai_y2_33', 'stai_y2_34',
              'stai_y2_35', 'stai_y2_36', 'stai_y2_37', 'stai_y2_38', 'stai_y2_39', 'stai_y2_40']

#  Substance use (labeled su1, su2...su 10 in MD2 database but is labeled by drug name in 
#                 MD acute bout dataset from marijuana to cigarettes)
cols_keep13 = ['su1', 'su2', 'su3', 'su4', 'su5', 'su6', 'su8', 'su9', 'su10', 'su7']

# Supplement use (labeled su11_1, su11_2.......su15 in MD2 database but is also labeled by
#                 name in MD acute bout dataset from Protein (6 options) to other
cols_keep14 = ['su11___1', 'su11___2', 'su11___3', 'su11___4', 'su11___5', 'su11___6', 'su12___1', 'su12___2', 'su12___3',
              'su12___4', 'su12___5', 'su12___6', 'su13___1', 'su13___2', 'su13___3', 'su13___4', 'su13___5', 'su13___6',
              'su14___1', 'su14___2', 'su14___3', 'su14___4', 'su14___5', 'su14___6', 'su15']

#adding all the columns into the new varible subset
MD_Acute_Bout_dataset_subset3 = MD_Acute_Bout_dataset_subset3[cols_keep]
                                                              

MD_Acute_Bout_dataset_subset3.head(30)
MD_Acute_Bout_dataset_subset3.shape 

'''

"##### DO NOT RUN THIS CHUNKKKKK ############\n\n## Get rid of timestaps and and completes\n\n## Choosing which variables I want as per client\n# All demographics\ncols_keep = ['screener_id', 'group', 'Eligibility', 'Study_ID', 'redcap_event_name']\ncols_keep2 = ['demographics_timestamp', 'AgeSesh1', 'Gendersesh1', 'Racialgroup', 'racialgroupother',\n              'EthnicitySesh1', 'LivingSituation', 'LivingOther', 'LivingOther2', 'Education', 'Employment',\n              'EmploymentOther', 'Income', 'SexualOrientation', 'demographics_complete'\n              'MDscreener1', 'MDscreener2', 'MDscreener3', 'MDscreener4', 'CurrentTreatment', 'SeekingTreatment',\n              'Pastdrug', 'Pastalcohol', 'CurrentExercise', 'type_of_exercise___1', 'type_of_exercise___2',\n              'type_of_exercise___3', 'exercise_frequency', 'exercise_duration___0', 'exercise_duration___1']\n\n# Eligibility questions\n# health history \n\ncols_keep3 = ['healthhix_1', 'healthhix_2', 'healthhix_3', 'healt

In [5]:
## Choosing which variables I want as per client
# Look at the dictionary to check for chest legs muscle groups bodfat
#                                                              -- Get rid of underscores
#                                                              -- Make all column names upper
#                                                                 df.columns = df.columns.str.uppr()
# #                                                                df.columns = df.columns.str.replace('_',"")
# Add audits 1 2 and 3 

# All demographics
# Eligibility questions
# health history
# height and weight (to calculate body mass index)             -- Change to correct names 
# All MAQ data (this is the Modifiable Activity Questionnaire and assessed past week physical activity)
# MDI questions 1-16 (this is the muscle dysmorphia inventory) -- Good
# RS scale (restraint eating)                                  -- Good
# BDS (bodybuilding dependence)                                -- Good
# SPAS (social physique anxiety)                               -- Remove underscore
# CESD (depression)                                            -- Good
# STAI (state-trait anxiety)                                   -- Remove the "_y2_20's and 30's"
#  Substance use (labeled su1, su2...su 10 in MD2 database but is labeled by drug name in 
#                 MD acute bout dataset from marijuana to cigarettes)
# Supplement use (labeled su11_1, su11_2.......su15 in MD2 database but is also labeled by
#                 name in MD acute bout dataset from Protein (6 options) to other

cols_keep = ['screener_id', 'group', 'Eligibility', 'MDscreener1', 'MDscreener2', 'MDscreener3', 'MDscreener4', 
             'AgeSesh1', 'Gendersesh1', 'Racialgroup', 'racialgroupother', 'EthnicitySesh1', 'LivingSituation',
             'LivingOther2', 'Education', 'Employment', 'Income', 'SexualOrientation',
             'CurrentTreatment', 'SeekingTreatment', 'Pastdrug', 'Pastalcohol', 'CurrentExercise', 
             'type_of_exercise___1', 'type_of_exercise___2', 'type_of_exercise___3',
             'exercise_frequency', 'exercise_duration___0', 'exercise_duration___1',
             'healthhix_1', 'healthhix_2', 'healthhix_3', 'healthhix_4', 'healthhix_5', 'healthhix_6', 'healthhix_7',
             'healthhix_8', 'healthhix_9', 'healthhix_17', 'healthhix_22', 'healthhix_23', 'healthhix_25', 'healthhix_26',
             'healthhix_27', 'healthhix_28',             
             'pa1_1','lbs', 'bf', 'chestr', 'legr', 'shoulderra', 'biceps','pa1_4', 'pa1_5', 'pa1_6', 'pa1_7',
             'maq_monday', 'maq_monday_intensity', 'maq_monday_duration', 'maq_monday_other',
             'maq_tuesday', 'maq_tuesday_intensity', 'maq_tuesday_duration', 'maq_tuesday_other',
             'maq_wednesday', 'maq_wednesday_intensity', 'maq_wednesday_duration', 'maq_wednesday_other',
             'maq_thursday', 'maq_thursday_intensity', 'maq_thursday_duration', 'maq_thursday_other',
             'maq_friday', 'maq_friday_intensity', 'maq_friday_duration', 'maq_friday_other',
             'maq_satdurday', 'maq_satdurday_intensity', 'maq_satdurday_duration', 'maq_satdurday_other',
             'maq_sunday', 'maq_sunday_intensity', 'maq_sunday_duration', 'maq_sunday_other',
             'maq_walking', 'maq_walking_duration',
             'maq1', 'maq2', 'maq3', 'maq3a', 'maq4', 'maq5', 'maq6',
             'mdi1', 'mdi2', 'mdi3', 'mdi4', 'mdi5', 'mdi6', 'mdi7', 'mdi8', 'mdi9', 'mdi10',
             'mdi11', 'mdi12', 'mdi13', 'mdi14', 'mdi15', 'mdi16', 'mdiscore', 
             'rs1', 'rs2', 'rs3', 'rs4', 'rs5', 'rs6', 'rs7', 'rs8', 'rs9', 'rs10', 'rsscore',
             'bds1', 'bds2', 'bds3', 'bds4', 'bds5', 'bds6', 'bds7', 'bds8', 'bds9', 'bdsscore',
             'spas_1', 'spas_2', 'spas_3', 'spas_4', 'spas_5', 'spas_6', 'spas_7', 'spas_8', 'spas_9', 'spas_10',
             'spas_11', 'spas_12', 'spasscore',
             'cesd1', 'cesd2', 'cesd3', 'cesd4', 'cesd5', 'cesd6', 'cesd7', 'cesd8', 'cesd9', 'cesd10', 'cesd_score',
             'stai_y2_21', 'stai_y2_22','stai_y2_23', 'stai_y2_24', 'stai_y2_25', 'stai_y2_26', 'stai_y2_27', 'stai_y2_28',
             'stai_y2_29', 'stai_y2_30', 'stai_y2_31', 'stai_y2_32', 'stai_y2_33', 'stai_y2_34', 'stai_y2_35', 'stai_y2_36',
             'stai_y2_37', 'stai_y2_38', 'stai_y2_39', 'stai_y2_40', 'stai_score',
             'su1', 'su2', 'su3', 'su4', 'su5', 'su6', 'su7', 'su8', 'su9', 'su10', 
             'audit_1', 'audit_2', 'audit_3', 'audit_score',
             'su11___1', 'su11___2', 'su11___3', 'su11___4', 'su11___5', 'su11___6',
             'su12___1', 'su12___2', 'su12___3', 'su12___4', 'su12___5', 'su12___6',
             'su13___1', 'su13___2', 'su13___3', 'su13___4', 'su13___5', 'su13___6',
             'su14___1', 'su14___2', 'su14___3', 'su14___4', 'su14___5', 'su14___6', 'su15']

# adding all the columns into the new varible subset
MD2_dataset_subset3 = MD2_dataset_subset3[cols_keep]

MD2_dataset_subset3.head(30)



Unnamed: 0,screener_id,group,Eligibility,MDscreener1,MDscreener2,MDscreener3,MDscreener4,AgeSesh1,Gendersesh1,Racialgroup,racialgroupother,EthnicitySesh1,LivingSituation,LivingOther2,Education,Employment,Income,SexualOrientation,CurrentTreatment,SeekingTreatment,Pastdrug,Pastalcohol,CurrentExercise,type_of_exercise___1,type_of_exercise___2,type_of_exercise___3,exercise_frequency,exercise_duration___0,exercise_duration___1,healthhix_1,healthhix_2,healthhix_3,healthhix_4,healthhix_5,healthhix_6,healthhix_7,healthhix_8,healthhix_9,healthhix_17,healthhix_22,healthhix_23,healthhix_25,healthhix_26,healthhix_27,healthhix_28,pa1_1,lbs,bf,chestr,legr,shoulderra,biceps,pa1_4,pa1_5,pa1_6,pa1_7,maq_monday,maq_monday_intensity,maq_monday_duration,maq_monday_other,maq_tuesday,maq_tuesday_intensity,maq_tuesday_duration,maq_tuesday_other,maq_wednesday,maq_wednesday_intensity,maq_wednesday_duration,maq_wednesday_other,maq_thursday,maq_thursday_intensity,maq_thursday_duration,maq_thursday_other,maq_friday,maq_friday_intensity,maq_friday_duration,maq_friday_other,maq_satdurday,maq_satdurday_intensity,maq_satdurday_duration,maq_satdurday_other,maq_sunday,maq_sunday_intensity,maq_sunday_duration,maq_sunday_other,maq_walking,maq_walking_duration,maq1,maq2,maq3,maq3a,maq4,maq5,maq6,mdi1,mdi2,mdi3,mdi4,mdi5,mdi6,mdi7,mdi8,mdi9,mdi10,mdi11,mdi12,mdi13,mdi14,mdi15,mdi16,mdiscore,rs1,rs2,rs3,rs4,rs5,rs6,rs7,rs8,rs9,rs10,rsscore,bds1,bds2,bds3,bds4,bds5,bds6,bds7,bds8,bds9,bdsscore,spas_1,spas_2,spas_3,spas_4,spas_5,spas_6,spas_7,spas_8,spas_9,spas_10,spas_11,spas_12,spasscore,cesd1,cesd2,cesd3,cesd4,cesd5,cesd6,cesd7,cesd8,cesd9,cesd10,cesd_score,stai_y2_21,stai_y2_22,stai_y2_23,stai_y2_24,stai_y2_25,stai_y2_26,stai_y2_27,stai_y2_28,stai_y2_29,stai_y2_30,stai_y2_31,stai_y2_32,stai_y2_33,stai_y2_34,stai_y2_35,stai_y2_36,stai_y2_37,stai_y2_38,stai_y2_39,stai_y2_40,stai_score,su1,su2,su3,su4,su5,su6,su7,su8,su9,su10,audit_1,audit_2,audit_3,audit_score,su11___1,su11___2,su11___3,su11___4,su11___5,su11___6,su12___1,su12___2,su12___3,su12___4,su12___5,su12___6,su13___1,su13___2,su13___3,su13___4,su13___5,su13___6,su14___1,su14___2,su14___3,su14___4,su14___5,su14___6,su15
0,502,0.0,1.0,1.0,0.0,1.0,1.0,31.0,1.0,5.0,,0.0,2.0,,6.0,6.0,0,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,158.84,72.0,10.0,82.5,65.0,40.0,30.0,64.0,104/56,77.0,125/66,2.0,3.0,90.0,,9.0,,,,2.0,3.0,90.0,,9.0,,,,2.0,3.0,90.0,,9.0,,,,9.0,,,,1.0,180.0,0.0,60.0,0.0,,0.0,0.0,0.0,2.0,2.0,1.0,3.0,0.0,3.0,2.0,0.0,1.0,1.0,3.0,1.0,1.0,1.0,3.0,2.0,,0.0,1.0,0.0,1.0,2.0,1.0,1.0,1.0,2.0,3.0,,5.0,5.0,7.0,7.0,5.0,5.0,5.0,5.0,1.0,,2.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,4.0,3.0,5.0,3.0,,0.0,2.0,1.0,1.0,2.0,0.0,1.0,1.0,1.0,1.0,,2.0,2.0,3.0,1.0,1.0,2.0,2.0,2.0,2.0,2.0,1.0,3.0,2.0,2.0,2.0,2.0,3.0,2.0,1.0,1.0,,1.0,1.0,0.0,1.0,0.0,0.0,,0.0,1.0,1.0,3.0,0.0,2.0,,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
1,506,0.0,1.0,1.0,1.0,1.0,1.0,,1.0,4.0,,1.0,1.0,,5.0,1.0,45000,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,188.53,68.0,18.3,130.0,85.0,55.0,45.0,66.0,133/81,64.0,135/82,2.0,1.0,120.0,,2.0,2.0,120.0,,3.0,1.0,45.0,,2.0,2.0,120.0,,2.0,2.0,60.0,,9.0,,,,9.0,,,,0.0,,1.0,180.0,0.0,,0.0,0.0,0.0,2.0,3.0,1.0,4.0,1.0,3.0,3.0,2.0,1.0,1.0,3.0,4.0,4.0,3.0,3.0,4.0,,0.0,1.0,2.0,3.0,2.0,3.0,3.0,2.0,2.0,2.0,20.0,7.0,7.0,7.0,7.0,7.0,7.0,1.0,7.0,1.0,,4.0,4.0,3.0,4.0,2.0,4.0,4.0,2.0,5.0,5.0,4.0,5.0,46.0,1.0,0.0,0.0,1.0,2.0,1.0,2.0,1.0,1.0,0.0,9.0,3.0,2.0,3.0,3.0,3.0,3.0,2.0,1.0,2.0,3.0,1.0,2.0,3.0,3.0,2.0,3.0,3.0,3.0,3.0,1.0,49.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,Preworkout
2,508,0.0,1.0,0.0,0.0,1.0,1.0,26.0,1.0,6.0,Indigenous and European ancestry,1.0,2.0,,7.0,1.0,120000,2.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,164.39,68.0,14.9,70.0,45.0,50.0,35.0,71.0,118/61,66.0,115/60,2.0,2.0,60.0,,9.0,,,,2.0,1.0,35.0,,2.0,1.0,35.0,,2.0,1.0,35.0,,3.0,3.0,45.0,,9.0,,,,0.0,,0.0,75.0,0.0,,0.0,0.0,0.0,3.0,3.0,0.0,3.0,0.0,2.0,3.0,2.0,3.0,1.0,3.0,2.0,2.0,3.0,2.0,1.0,32.0,0.0,1.0,3.0,1.0,1.0,1.0,1.0,2.0,1.0,2.0,13.0,6.0,5.0,5.0,7.0,6.0,4.0,5.0,6.0,6.0,50.0,2.0,3.0,2.0,1.0,3.0,2.0,2.0,3.0,4.0,2.0,3.0,2.0,29.0,1.0,2.0,0.0,1.0,2.0,0.0,1.0,1.0,0.0,0.0,8.0,2.0,2.0,2.0,2.0,1.0,2.0,2.0,1.0,3.0,2.0,1.0,2.0,2.0,3.0,1.0,2.0,3.0,2.0,2.0,2.0,39.0,1.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,1.0,2.0,0.0,1.0,3.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
5,522,0.0,1.0,0.0,0.0,1.0,1.0,26.0,1.0,7.0,,1.0,7.0,,6.0,2.0,40000,1.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,152.91,68.0,16.1,122.5,62.5,50.0,35.0,65.0,124/54,67.0,112/58,2.0,2.0,75.0,,3.0,2.0,90.0,,2.0,3.0,60.0,,2.0,2.0,50.0,,9.0,,,,9.0,,,,9.0,,,,0.0,,0.0,4.0,0.0,,0.0,0.0,0.0,3.0,2.0,1.0,2.0,1.0,4.0,3.0,1.0,3.0,2.0,2.0,2.0,3.0,2.0,3.0,2.0,34.0,2.0,0.0,1.0,0.0,1.0,1.0,2.0,2.0,2.0,0.0,11.0,5.0,6.0,4.0,5.0,4.0,5.0,6.0,5.0,3.0,43.0,3.0,2.0,1.0,2.0,3.0,2.0,2.0,3.0,2.0,3.0,4.0,3.0,30.0,2.0,3.0,0.0,1.0,1.0,1.0,0.0,1.0,2.0,1.0,12.0,2.0,2.0,3.0,4.0,2.0,2.0,3.0,2.0,2.0,2.0,1.0,2.0,3.0,3.0,2.0,3.0,2.0,3.0,3.0,2.0,48.0,0.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,2.0,2.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
7,533,0.0,1.0,1.0,0.0,1.0,1.0,23.0,1.0,2.0,,0.0,1.0,,2.0,1.0,,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,150.83,70.5,13.5,72.5,60.0,40.0,30.0,45.0,121/57,47.0,111/46,2.0,3.0,75.0,,2.0,3.0,75.0,,2.0,3.0,75.0,,2.0,3.0,75.0,,9.0,,,,2.0,3.0,75.0,,2.0,3.0,75.0,,1.0,51.0,1.0,4.5,0.0,,0.0,0.0,0.0,2.0,3.0,0.0,2.0,0.0,0.0,3.0,1.0,2.0,0.0,3.0,3.0,1.0,0.0,3.0,0.0,23.0,1.0,0.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,0.0,11.0,6.0,5.0,7.0,6.0,4.0,1.0,5.0,2.0,3.0,39.0,4.0,3.0,2.0,3.0,4.0,3.0,2.0,2.0,2.0,2.0,4.0,1.0,32.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,1.0,0.0,0.0,5.0,2.0,2.0,3.0,3.0,1.0,3.0,3.0,2.0,2.0,3.0,1.0,3.0,2.0,2.0,2.0,2.0,2.0,3.0,2.0,3.0,46.0,3.0,3.0,0.0,1.0,0.0,0.0,,0.0,0.0,3.0,3.0,2.0,3.0,8.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
23,561,0.0,1.0,1.0,0.0,1.0,0.0,20.0,1.0,999.0,,1.0,7.0,,3.0,7.0,,1.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,131.36,68.5,12.3,62.5,52.5,40.0,22.5,85.0,113/71,85.0,126/72,2.0,2.0,70.0,,2.0,3.0,70.0,,2.0,2.0,70.0,,2.0,2.0,70.0,,9.0,,,,9.0,,,,9.0,,,,0.0,,1.0,1.5,0.0,,0.0,0.0,0.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,2.0,2.0,0.0,2.0,1.0,14.0,2.0,1.0,0.0,1.0,1.0,0.0,2.0,1.0,1.0,0.0,9.0,2.0,1.0,6.0,5.0,5.0,2.0,5.0,2.0,2.0,30.0,2.0,3.0,2.0,1.0,3.0,1.0,2.0,4.0,2.0,2.0,4.0,2.0,28.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,2.0,1.0,1.0,1.0,2.0,2.0,2.0,1.0,1.0,1.0,1.0,1.0,2.0,1.0,1.0,2.0,2.0,2.0,2.0,29.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
24,562,0.0,1.0,1.0,0.0,1.0,1.0,27.0,1.0,5.0,,1.0,7.0,,3.0,2.0,,1.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,3.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,171.99,68.0,17.7,135.0,75.0,60.0,35.0,61.0,107/65,63.0,128/66,2.0,3.0,120.0,,,,,,9.0,,,,2.0,3.0,120.0,,,,,,2.0,3.0,120.0,,2.0,3.0,120.0,,0.0,,0.0,0.0,0.0,,0.0,0.0,0.0,4.0,3.0,2.0,4.0,0.0,3.0,3.0,0.0,2.0,1.0,1.0,3.0,2.0,4.0,0.0,2.0,32.0,0.0,4.0,4.0,3.0,1.0,0.0,0.0,1.0,2.0,4.0,19.0,7.0,5.0,3.0,7.0,5.0,4.0,6.0,3.0,1.0,41.0,2.0,3.0,4.0,1.0,3.0,5.0,3.0,3.0,2.0,2.0,2.0,4.0,34.0,0.0,0.0,0.0,2.0,1.0,0.0,0.0,1.0,0.0,0.0,4.0,3.0,2.0,2.0,3.0,3.0,2.0,3.0,2.0,4.0,3.0,4.0,4.0,3.0,4.0,4.0,3.0,3.0,2.0,3.0,1.0,58.0,2.0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
31,576,0.0,1.0,0.0,0.0,1.0,1.0,34.0,1.0,6.0,Hispanic,1.0,1.0,,7.0,1.0,65000,2.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,223.73,69.5,28.5,135.0,80.0,70.0,50.0,76.0,140/73,62.0,139/76,9.0,,,,2.0,3.0,60.0,,2.0,2.0,45.0,,2.0,2.0,45.0,,2.0,2.0,60.0,,8.0,,,Cardio/abdominal,9.0,,,,0.0,,1.0,2.5,0.0,,0.0,0.0,0.0,4.0,3.0,2.0,2.0,1.0,3.0,4.0,1.0,2.0,2.0,4.0,2.0,1.0,4.0,4.0,2.0,39.0,2.0,0.0,2.0,1.0,2.0,0.0,1.0,1.0,2.0,3.0,14.0,6.0,5.0,6.0,6.0,5.0,5.0,6.0,6.0,3.0,48.0,3.0,5.0,4.0,4.0,3.0,3.0,3.0,3.0,4.0,4.0,5.0,3.0,44.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,1.0,0.0,0.0,4.0,2.0,1.0,3.0,2.0,2.0,3.0,2.0,2.0,1.0,2.0,1.0,2.0,3.0,3.0,1.0,2.0,1.0,2.0,2.0,1.0,38.0,4.0,3.0,1.0,0.0,1.0,0.0,,0.0,0.0,5.0,3.0,1.0,3.0,7.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,
32,584,0.0,1.0,1.0,1.0,1.0,1.0,21.0,1.0,5.0,,0.0,7.0,,3.0,6.0,90000,1.0,0.0,,3.0,3.0,1.0,0.0,1.0,0.0,5.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,ADD: Strattera,175.42,72.0,9.6,95.0,70.0,45.0,30.0,74.0,101/63,69.0,105/63,2.0,2.0,90.0,,2.0,3.0,60.0,,9.0,,,,2.0,3.0,60.0,,2.0,2.0,90.0,,9.0,,,,2.0,2.0,120.0,,1.0,15.0,1.0,4.0,0.0,,0.0,0.0,0.0,4.0,4.0,3.0,2.0,3.0,4.0,4.0,4.0,3.0,3.0,3.0,4.0,3.0,2.0,3.0,4.0,49.0,4.0,1.0,4.0,1.0,2.0,2.0,2.0,1.0,3.0,0.0,20.0,7.0,7.0,6.0,7.0,7.0,1.0,4.0,6.0,5.0,50.0,4.0,5.0,4.0,5.0,4.0,5.0,5.0,2.0,4.0,4.0,4.0,4.0,50.0,2.0,1.0,2.0,3.0,2.0,1.0,2.0,2.0,3.0,3.0,21.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,4.0,3.0,4.0,4.0,4.0,4.0,4.0,3.0,4.0,3.0,3.0,74.0,4.0,1.0,0.0,0.0,1.0,0.0,,0.0,0.0,1.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,
33,591,0.0,1.0,0.0,1.0,1.0,1.0,18.0,1.0,4.0,,0.0,7.0,,3.0,999.0,,1.0,0.0,,3.0,3.0,1.0,0.0,0.0,1.0,4.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,218.82,76.5,12.7,105.0,80.0,52.5,35.0,63.0,110/64,67.0,144/62,2.0,2.0,60.0,,9.0,,,,2.0,2.0,60.0,,2.0,3.0,90.0,,9.0,,,,9.0,,,,2.0,2.0,60.0,,0.0,,0.0,4.0,0.0,,0.0,0.0,0.0,2.0,3.0,1.0,3.0,0.0,3.0,2.0,2.0,2.0,1.0,3.0,2.0,4.0,3.0,3.0,3.0,34.0,2.0,2.0,4.0,2.0,1.0,2.0,2.0,3.0,2.0,2.0,22.0,3.0,4.0,3.0,6.0,6.0,4.0,4.0,5.0,2.0,37.0,4.0,3.0,2.0,2.0,4.0,4.0,2.0,2.0,3.0,3.0,4.0,3.0,36.0,2.0,2.0,2.0,3.0,2.0,0.0,0.0,1.0,2.0,3.0,17.0,3.0,2.0,3.0,4.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,4.0,3.0,3.0,3.0,3.0,3.0,2.0,57.0,4.0,1.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,1.0,0.0,1.0,2.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,


In [6]:
# Rename Columns HERE:
# Height and weight
MD2_dataset_subset3.rename(columns = {'lbs':'Height'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'pa1_1':'Weight'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'MDscreener1':'EligibilityRescreenQ1'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'MDscreener2':'EligibilityRescreenQ2'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'MDscreener3':'EligibilityRescreenQ3'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'MDscreener4':'EligibilityRescreenQ4'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'AgeSesh1':'Age2'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'Gendersesh1':'Gender2'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'Racialgroup':'race'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'racialgroupother':'raceother'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'EthnicitySesh1':'HispanicLatino'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'LivingSituation':'living'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'LivingOther2':'numberofroomates'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'EthnicitySesh1':'HispanicLatino'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'EthnicitySesh1':'HispanicLatino'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'bf':'Bodyfat'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'chestr':'Chest'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'legr':'legs'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'shoulderra':'shoulders'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'biceps':'biceps'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'pa1_4':'RHR'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'pa1_5':'RBP'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'pa1_6':'RHR2'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'pa1_7':'RBP2'}, inplace = True)

MD2_dataset_subset3.rename(columns = {'stai_y2_21':'stai1'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_22':'stai2'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_23':'stai3'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_24':'stai4'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_25':'stai5'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_26':'stai6'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_27':'stai7'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_28':'stai8'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_29':'stai9'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_30':'stai10'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_31':'stai11'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_32':'stai12'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_33':'stai13'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_34':'stai14'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_35':'stai15'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_36':'stai16'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_37':'stai17'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_38':'stai18'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_39':'stai19'}, inplace = True)
MD2_dataset_subset3.rename(columns = {'stai_y2_40':'stai20'}, inplace = True)


MD2_dataset_subset3.columns = MD2_dataset_subset3.columns.str.upper()
MD2_dataset_subset3.columns = MD2_dataset_subset3.columns.str.replace('_',"")

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [7]:
MD2_dataset_subset3.shape 


(26, 215)

In [9]:
MD2_dataset_subset3.to_csv('C:/Users/RRC/OneDrive/Documents/CLASSES/YEAR_4/Semester_1/DS_Capstone/PREPROCESSED_MD2_Database.csv', index=False, na_rep='NaN')