<a href="https://colab.research.google.com/github/Jasminehan1112/codecode/blob/main/R%20code%201.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
library(dplyr)
library(readr)



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union




In [3]:
# Load required packages
library(dplyr)
library(readr)
#library(scale)

# Load the nutrient data
foods <- read.csv("https://raw.githubusercontent.com/CaitlinLloyd/Psychology_Programming2025/refs/heads/main/Data/nutrient_info.csv")

# Standardize relevant nutrients
nutrients_scaled <- scale(foods %>% dplyr::select(Fat_g, CHO_g, PRO_g, Energy_Density))
foods <- bind_cols(foods, as.data.frame(nutrients_scaled)) #merge them together, if must need to have the same number of columns
colnames(foods)[(ncol(foods)-3):ncol(foods)] <- c("Fat_s", "CHO_s", "PRO_s", "ED_s") #let us get the last 3 in the data; from column 17 to 20, give them these names "Fat_s", "CHO_s", "PRO_s", "ED_s"

# Set up parameters, 20 participants
n_participants <- 20

# Function to simulate ratings for one participant
simulate_participant <- function(pid, foods_df) {
  n_foods <- nrow(foods_df)

  health_rating <- round(3 + (-0.6 * foods_df$Fat_s + 0.4 * foods_df$PRO_s +
                              0.3 * foods_df$CHO_s + rnorm(n_foods, 0, 0.5)))
  health_rating <- pmin(pmax(health_rating, 1), 5)

  taste_rating <- round(3 + (0.6 * foods_df$Fat_s + 0.4 * foods_df$ED_s +
                             rnorm(n_foods, 0, 0.5)))
  taste_rating <- pmin(pmax(taste_rating, 1), 5)

  choice <- round(3 + 0.5 * (taste_rating - 3) + 0.3 * (health_rating - 3) +
                    rnorm(n_foods, 0, 0.7))
  choice <- pmin(pmax(choice, 1), 5)

  # Reaction time: faster (closer to 1s) for more extreme choices
  preference_strength <- abs(choice - 3)
  reaction_time <- round(runif(n_foods, 3, 5) - 0.5 * preference_strength + rnorm(n_foods, 0, 0.2), 2)
  reaction_time <- pmax(pmin(reaction_time, 5), 1)

  df <- data.frame(
    participant = pid,
    stimulus = foods_df$stimulus,
    Fat_g = foods_df$Fat_g,
    CHO_g = foods_df$CHO_g,
    PRO_g = foods_df$PRO_g,
    Energy_Density = foods_df$Energy_Density,
    health_rating = health_rating,
    taste_rating = taste_rating,
    choice = choice,
    reaction_time = reaction_time
  )

  # Introduce missing values
  n_missing <- round(0.05 * n_foods)
  miss_idx <- sample(1:n_foods, n_missing)
  df$choice[miss_idx] <- NA
  df$reaction_time[miss_idx] <- NA

  return(df)
}

simulated_data <- bind_rows(lapply(1:n_participants, simulate_participant, foods_df = foods))

[1m[22mNew names:
[36m•[39m `CHO_g` -> `CHO_g...5`
[36m•[39m `PRO_g` -> `PRO_g...6`
[36m•[39m `Fat_g` -> `Fat_g...7`
[36m•[39m `Energy_Density` -> `Energy_Density...11`
[36m•[39m `...13` -> `...12`
[36m•[39m `Fat_g` -> `Fat_g...14`
[36m•[39m `CHO_g` -> `CHO_g...15`
[36m•[39m `PRO_g` -> `PRO_g...16`
[36m•[39m `Energy_Density` -> `Energy_Density...17`


In [4]:
simulated_data

participant,stimulus,Fat_g,CHO_g,PRO_g,Energy_Density,health_rating,taste_rating,choice,reaction_time
<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,1%milk,0.9700,4.990,3.3700,0.42,3,2,1,4.09
1,air popcorn,4.5400,77.000,12.9400,3.87,4,3,4,4.05
1,american cheese,31.2500,1.600,22.1500,3.75,3,4,3,3.75
1,apple slices,0.1700,13.810,0.2600,0.52,4,3,4,3.71
1,avacado,10.0600,7.820,2.2300,1.20,2,3,1,2.51
1,baby cheese,24.1070,19.642,19.6420,3.74,2,4,4,3.88
1,bagel and cc,11.1750,36.898,8.8220,2.82,3,3,3,4.76
1,bagel plain,1.6200,50.500,10.0200,2.57,4,2,,
1,baguette oil,23.5290,43.949,9.1490,4.20,2,3,2,2.46
1,baked potato,0.1300,21.150,2.5000,0.93,4,3,3,4.88


In [33]:
simulated_data$reaction_time


In [5]:
#what are choice ratings for american cheese only?
simulated_data[simulated_data$stimulus=="american cheese",]$choice

In [6]:
#make a new dataframe, only have american cheese row in it
am_cheese<-simulated_data[simulated_data$stimulus=="american cheese",]


In [7]:
subset(simulated_data,simulated_data$stimulus=="american cheese")%>%select(choice)

Unnamed: 0_level_0,choice
Unnamed: 0_level_1,<dbl>
3,3.0
141,4.0
279,3.0
417,5.0
555,2.0
693,4.0
831,4.0
969,3.0
1107,5.0
1245,5.0


In [8]:
#function for printing mean and median of rating per food
mean(am_cheese$choice,na.rm = TRUE)
median(am_cheese$choice,na.rm = TRUE)



In [12]:
#to create a function
mean_foods<-function(data,food_name,rating){
mini<-subset(simulated_data,simulated_data$stimulus==food_name)
print(mean(mini$rating,na.rm = TRUE))
m<-mean(mini$rating)
return
}


In [11]:
mean_foods(simulated_data,"american cheese","choice")

“argument is not numeric or logical: returning NA”


[1] NA


“argument is not numeric or logical: returning NA”


In [13]:
#to create a function
mean_foods<-function(data,col_name,food_name, rating){
mini<-subset(data,data[[col_name]]==food_name)
print(mean(mini[[rating]],na.rm = TRUE))
m<-mean(mini[[rating]],na.rm = TRUE)
return (m)
}

In [14]:
mean_foods(simulated_data,"stimulus", "american cheese","taste_rating")

[1] 4.55


In [57]:
#creat a loop to produce the mean for each food
for (food in c(unique(simulated_data$stimulus))) {
print(mean_foods(simulated_data,"stimulus",food,"choice"))
}

[1] 2.75
[1] 2.75
[1] 3.473684
[1] 3.473684
[1] 3.684211
[1] 3.684211
[1] 2.842105
[1] 2.842105
[1] 2.5
[1] 2.5
[1] 3.684211
[1] 3.684211
[1] 2.777778
[1] 2.777778
[1] 2.944444
[1] 2.944444
[1] 3.3
[1] 3.3
[1] 2.388889
[1] 2.388889
[1] 2.842105
[1] 2.842105
[1] 2.55
[1] 2.55
[1] 2.578947
[1] 2.578947
[1] 2.444444
[1] 2.444444
[1] 2.157895
[1] 2.157895
[1] 3.684211
[1] 3.684211
[1] 2.947368
[1] 2.947368
[1] 2.947368
[1] 2.947368
[1] 2.388889
[1] 2.388889
[1] 2.421053
[1] 2.421053
[1] 2.526316
[1] 2.526316
[1] 2.05
[1] 2.05
[1] 3.35
[1] 3.35
[1] 3.473684
[1] 3.473684
[1] 3.65
[1] 3.65
[1] 3.294118
[1] 3.294118
[1] 2.473684
[1] 2.473684
[1] 2.263158
[1] 2.263158
[1] 3.105263
[1] 3.105263
[1] 3.25
[1] 3.25
[1] 2.894737
[1] 2.894737
[1] 3.789474
[1] 3.789474
[1] 2.85
[1] 2.85
[1] 3.55
[1] 3.55
[1] 2.647059
[1] 2.647059
[1] 2.736842
[1] 2.736842
[1] 3.421053
[1] 3.421053
[1] 2.5
[1] 2.5
[1] 2.947368
[1] 2.947368
[1] 3.777778
[1] 3.777778
[1] 3.35
[1] 3.35
[1] 2.842105
[1] 2.842105
[1] 3.2
[1

In [15]:
for (food in c(unique(simulated_data$stimulus))) {
  print (paste0("choice=",mean_foods(simulated_data,"stimulus",food,"choice")))
  print (paste0("health_rating=",mean_foods(simulated_data,"stimulus",food,"health_rating")))
  print (paste0("taste_rating=",mean_foods(simulated_data,"stimulus",food, "taste_rating")))
}

[1] 2.578947
[1] "choice=2.57894736842105"
[1] 2.85
[1] "health_rating=2.85"
[1] 2
[1] "taste_rating=2"
[1] 3.578947
[1] "choice=3.57894736842105"
[1] 4.25
[1] "health_rating=4.25"
[1] 3.05
[1] "taste_rating=3.05"
[1] 3.684211
[1] "choice=3.68421052631579"
[1] 2.45
[1] "health_rating=2.45"
[1] 4.55
[1] "taste_rating=4.55"
[1] 2.75
[1] "choice=2.75"
[1] 2.95
[1] "health_rating=2.95"
[1] 2.2
[1] "taste_rating=2.2"
[1] 2.55
[1] "choice=2.55"
[1] 2.5
[1] "health_rating=2.5"
[1] 2.75
[1] "taste_rating=2.75"
[1] 3.45
[1] "choice=3.45"
[1] 2.85
[1] "health_rating=2.85"
[1] 4.1
[1] "taste_rating=4.1"
[1] 3.277778
[1] "choice=3.27777777777778"
[1] 2.9
[1] "health_rating=2.9"
[1] 3.2
[1] "taste_rating=3.2"
[1] 3.166667
[1] "choice=3.16666666666667"
[1] 3.9
[1] "health_rating=3.9"
[1] 2.6
[1] "taste_rating=2.6"
[1] 3.5
[1] "choice=3.5"
[1] 2.35
[1] "health_rating=2.35"
[1] 3.9
[1] "taste_rating=3.9"
[1] 2.75
[1] "choice=2.75"
[1] 3.1
[1] "health_rating=3.1"
[1] 2.3
[1] "taste_rating=2.3"
[1] 2.84

In [17]:
#another way
for (food in c(unique(simulated_data$stimulus))) {


ERROR: Error in parse(text = input): <text>:4:0: unexpected end of input
2: for (food in c(unique(simulated_data$stimulus))) {
3:   
  ^


In [18]:
#Remove missing values, for here it are the rows are complete, have all the data
simulated_data_complete <- simulated_data[complete.cases(simulated_data),]

In [19]:
#Summarize health for one person
# Filter for one participant (e.g., participant 1)
participant_1_data <- simulated_data %>% filter(participant == 1)

# Summary statistics for health rating
mean(participant_1_data$health_rating)

#print this value

print(paste0("the average health rating for participant one is: ",mean(participant_1_data$health_rating)))

[1] "the average health rating for participant one is: 2.94202898550725"


In [20]:
#Now lets merge the rating file with the info file, x=先写的data， y=后写的
simulated_data_complete <- merge(simulated_data_complete,foods,by.x="stimulus",by.y="stimulus")

In [22]:
#now lets group by fat content and summarize for each participant
summary <- simulated_data_complete %>% dplyr::group_by(participant,HI_LO_fat) %>% summarize_at(c('choice',"reaction_time"),c(mean))

#replace the 0 1 values with high and low fat
summary$HI_LO_fat <- ifelse(summary$HI_LO_fat==0,"low","high")
summary
# what are other ways we can do this?

participant,HI_LO_fat,choice,reaction_time
<int>,<chr>,<dbl>,<dbl>
1,low,2.885714,3.853143
1,high,3.311475,3.578033
2,low,2.8,3.694143
2,high,3.360656,3.752295
3,low,2.757143,3.726143
3,high,3.245902,3.644918
4,low,2.791045,3.834627
4,high,3.28125,3.531719
5,low,2.71831,3.849155
5,high,3.333333,3.5625


In [24]:
# what are other ways we can do this?
for(participant in c(simulated_data_complete$participant)){
  x<-subset(simulated_data_complete, simulated_data_complete$participant=participant)
  x_lo<-subset(x,x$HI_LO_Fat==0)
  x_hi<-subset(x,x$HI_LO_Fat==1)
}

ERROR: Error in parse(text = input): <text>:3:73: unexpected '='
2: for(participant in c(simulated_data_complete$participant)){
3:   x<-subset(simulated_data_complete, simulated_data_complete$participant=
                                                                           ^


In [25]:
#pivot summary frame to wide
#now lets group by fat content and summarize for each participant
library(tidyr)

wide <- summary  %>% pivot_wider(1,names_from = "HI_LO_fat",values_from = c("choice","reaction_time"))
wide
# let's compare the average choice ratings for each participant for high and low-fat foods

t.test(wide$choice_low,wide$choice_high)

“[1m[22mSpecifying the `id_cols` argument by position was deprecated in tidyr 1.3.0.
[36mℹ[39m Please explicitly name `id_cols`, like `id_cols = 1`.”


participant,choice_low,choice_high,reaction_time_low,reaction_time_high
<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,2.885714,3.311475,3.853143,3.578033
2,2.8,3.360656,3.694143,3.752295
3,2.757143,3.245902,3.726143,3.644918
4,2.791045,3.28125,3.834627,3.531719
5,2.71831,3.333333,3.849155,3.5625
6,2.608696,3.322581,3.77942,3.671774
7,3.028571,3.081967,3.699,3.642787
8,3.014286,3.311475,3.788429,3.436393
9,2.84507,3.3,3.635775,3.575167
10,2.823529,3.174603,3.797647,3.663333



	Welch Two Sample t-test

data:  wide$choice_low and wide$choice_high
t = -12.477, df = 37.12, p-value = 7.553e-15
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.5083123 -0.3663006
sample estimates:
mean of x mean of y 
 2.807987  3.245294 


In [80]:
mean(wide$choice_high)
mean(wide$choice_low)

In [64]:
summary

participant,HI_LO_fat,choice,reaction_time
<int>,<chr>,<dbl>,<dbl>
1,low,2.913043,3.688696
1,high,3.290323,3.617581
2,low,2.735294,3.705588
2,high,3.190476,3.55127
3,low,2.657143,3.670714
3,high,3.147541,3.682131
4,low,2.652174,3.694928
4,high,3.258065,3.579516
5,low,2.57971,3.62058
5,high,3.290323,3.723226


In [2]:
# Load the nutrient data
foods <- read.csv("https://raw.githubusercontent.com/CaitlinLloyd/Psychology_Programming2025/refs/heads/main/Data/nutrient_info.csv")


In [4]:
foods

Food_image,HI_LO_fat,food,Total.kcal,CHO_g,PRO_g,Fat_g,CHO_pctKcal,PRO_pctKcal,Fat_pctKcal,Energy_Density,...13,stimulus
<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>
1%milk.jpg,0,1% milk,42.000,4.990,3.3700,0.9700,45.979,34.262,20.301,0.42,,1%milk
air popcorn.jpg,0,Air-popped popcorn,387.000,77.000,12.9400,4.5400,81.121,9.128,9.819,3.87,,air popcorn
american cheese.jpg,1,American cheese,375.000,1.600,22.1500,31.2500,1.651,25.221,73.250,3.75,,american cheese
apple slices.jpg,0,Apple,52.000,13.810,0.2600,0.1700,95.608,1.680,2.736,0.52,,apple slices
avacado.jpg,1,Avocado: green,120.000,7.820,2.2300,10.0600,23.460,6.244,70.169,1.20,,avacado
baby cheese.jpg,1,Baby Bell cheese w/crackers,374.100,19.642,19.6420,24.1070,21.000,21.000,58.000,3.74,,baby cheese
bagel and cc.jpg,1,Bagel & cream cheese,282.150,36.898,8.8220,11.1750,52.303,12.684,34.932,2.82,,bagel and cc
bagel plain.jpg,0,Bagel: plain,257.000,50.500,10.0200,1.6200,78.599,15.595,5.673,2.57,,bagel plain
baguette oil.jpg,1,Baguette with olive oil,420.490,43.949,9.1490,23.5290,41.789,8.700,49.554,4.20,,baguette oil
baked potato.jpg,0,Baked Potato,93.000,21.150,2.5000,0.1300,91.650,7.473,1.170,0.93,,baked potato


In [None]:
c() #this is a list
g<- c() #creat an empty list

In [13]:
#get the food(third)column
foods$food
foods[,3]#take the third column from food for me, and it starts at 0, python starts at 1
#if we want to select row 2 column 3
foods[2,3]
#only row 2
foods[2,]

foods['food']

Unnamed: 0_level_0,Food_image,HI_LO_fat,food,Total.kcal,CHO_g,PRO_g,Fat_g,CHO_pctKcal,PRO_pctKcal,Fat_pctKcal,Energy_Density,...13,stimulus
Unnamed: 0_level_1,<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>
2,air popcorn.jpg,0,Air-popped popcorn,387,77,12.94,4.54,81.121,9.128,9.819,3.87,,air popcorn


food
<chr>
1% milk
Air-popped popcorn
American cheese
Apple
Avocado: green
Baby Bell cheese w/crackers
Bagel & cream cheese
Bagel: plain
Baguette with olive oil
Baked Potato


In [16]:
nutrients_scaled <- scale(foods %>% dplyr::select(Fat_g, CHO_g, PRO_g, Energy_Density))
foods <- bind_cols(foods, as.data.frame(nutrients_scaled)) #merge them together, if must need to have the same number of columns

[1m[22mNew names:
[36m•[39m `CHO_g` -> `CHO_g...5`
[36m•[39m `PRO_g` -> `PRO_g...6`
[36m•[39m `Fat_g` -> `Fat_g...7`
[36m•[39m `Energy_Density` -> `Energy_Density...11`
[36m•[39m `...13` -> `...12`
[36m•[39m `Fat_g` -> `Fat_g...14`
[36m•[39m `CHO_g` -> `CHO_g...15`
[36m•[39m `PRO_g` -> `PRO_g...16`
[36m•[39m `Energy_Density` -> `Energy_Density...17`


In [17]:
foods

Food_image,HI_LO_fat,food,Total.kcal,CHO_g...5,PRO_g...6,Fat_g...7,CHO_pctKcal,PRO_pctKcal,Fat_pctKcal,Energy_Density...11,...12,stimulus,Fat_g...14,CHO_g...15,PRO_g...16,Energy_Density...17
<chr>,<int>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<lgl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>
1%milk.jpg,0,1% milk,42.000,4.990,3.3700,0.9700,45.979,34.262,20.301,0.42,,1%milk,-0.82878469,-0.96071797,-0.5812889,-1.1669541
air popcorn.jpg,0,Air-popped popcorn,387.000,77.000,12.9400,4.5400,81.121,9.128,9.819,3.87,,air popcorn,-0.50233978,1.70027353,0.7486331,0.8718593
american cheese.jpg,1,American cheese,375.000,1.600,22.1500,31.2500,1.651,25.221,73.250,3.75,,american cheese,1.94005343,-1.08598892,2.0285267,0.8009440
apple slices.jpg,0,Apple,52.000,13.810,0.2600,0.1700,95.608,1.680,2.736,0.52,,apple slices,-0.90193762,-0.63479178,-1.0134788,-1.1078580
avacado.jpg,1,Avocado: green,120.000,7.820,2.2300,10.0600,23.460,6.244,70.169,1.20,,avacado,0.00241539,-0.85614074,-0.7397122,-0.7060050
baby cheese.jpg,1,Baby Bell cheese w/crackers,374.100,19.642,19.6420,24.1070,21.000,21.000,58.000,3.74,,baby cheese,1.28688927,-0.41928140,1.6799954,0.7950344
bagel and cc.jpg,1,Bagel & cream cheese,282.150,36.898,8.8220,11.1750,52.303,12.684,34.932,2.82,,bagel and cc,0.10437228,0.21838100,0.1763636,0.2513509
bagel plain.jpg,0,Bagel: plain,257.000,50.500,10.0200,1.6200,78.599,15.595,5.673,2.57,,bagel plain,-0.76934794,0.72101684,0.3428471,0.1036108
baguette oil.jpg,1,Baguette with olive oil,420.490,43.949,9.1490,23.5290,41.789,8.700,49.554,4.20,,baguette oil,1.23403628,0.47893719,0.2218061,1.0668762
baked potato.jpg,0,Baked Potato,93.000,21.150,2.5000,0.1300,91.650,7.473,1.170,0.93,,baked potato,-0.90559526,-0.36355615,-0.7021909,-0.8655643


In [26]:
#Now load in a dataset we created
dd <- read.csv("https://raw.githubusercontent.com/CaitlinLloyd/Psychology_Programming2025/refs/heads/main/Data/DelayDisc_example.csv")

In [37]:
dd

onset,rt,choice,money_left,delay_left,money_right,delay_right,participant,more_delay,chose_delayed,earnings
<dbl>,<dbl>,<int>,<dbl>,<int>,<dbl>,<int>,<int>,<chr>,<chr>,<dbl>
17.00942,4.48,,23.21,131,10.99,51,1,left,,
28.01366,3.16,1,16.43,32,9.99,19,1,left,yes,16.43
43.01741,4.14,1,38.44,33,32.02,12,1,left,yes,38.44
56.02182,4.47,1,38.66,100,26.57,24,1,left,yes,38.66
71.02479,3.63,1,29.54,142,27.76,6,1,left,yes,29.54
84.02752,5.03,2,21.40,111,22.30,179,1,right,yes,22.30
99.03202,4.71,2,1.09,57,12.08,120,1,right,yes,12.08
112.03549,3.37,2,12.89,6,33.76,149,1,right,yes,33.76
123.03921,5.93,2,14.70,120,17.86,173,1,right,yes,17.86
134.04283,5.86,2,19.62,150,19.25,27,1,left,no,19.25


In [27]:
#Use if statements to figure out which is delayed option
#choic=1, left choice; =2, right choice
dd$more_delay<-ifelse(dd$delay_left <dd$delay_right, "right","left")



In [30]:
# Now summarize the RT for each person when they chose delayed vs chose sooner reward
dd$chose_delayed<-ifelse(dd$more_delay=="right" &dd$choice==2 | dd$more_delay == "left" &dd$choice==1, "yes","no")
dd$chose_delayed
summary <- dd %>% dplyr::group_by(participant,chose_delayed) %>% summarize_at(c("choice","rt"),c(mean))
summary

participant,chose_delayed,choice,rt
<int>,<chr>,<dbl>,<dbl>
1,no,1.416667,4.24
1,yes,1.428571,4.42
1,,,4.48
2,no,1.407407,4.908148
2,yes,1.393939,4.662727


In [46]:
#calculate average trial earnings per participants
dd$earnings<- ifelse(dd$choice==1, dd$money_left, dd$money_right)
dd$earnings[is.na(dd$choice)]<-NA
dd %>% dplyr::group_by(participant) %>% summarize_at(c("earnings"),c(mean), na.rm = TRUE)
#calculate average earnings across the trials for each person

participant,earnings
<int>,<dbl>
1,23.82322
2,22.11183
