___
LOAD LIBRARIES
___

In [9]:
import pandas as pd
from scipy.stats import pearsonr

In [10]:
import numpy as np
import config
import os
import openai
import time

import json

import random
from sklearn.metrics.pairwise import cosine_similarity
from tqdm import tqdm

___
LOAD IN DATA - GOAL LABELS, SELF REPORT MEASURES
___

In [2]:
goal_path = "./data/proc/goals/daily_goals_labeled_embedding_wide.csv"
self_report_path = "./data/proc/self_report/self_report.csv"

In [3]:
self_report_df = pd.read_csv(self_report_path)
self_report_df.head(3)

Unnamed: 0,ParticipantIdentifier,trial_date,DAILY_goal1_confidence,DAILY_goal1_consequences,DAILY_goal1_effort,DAILY_goal1_importance,DAILY_goal2_confidence,DAILY_goal2_consequences,DAILY_goal2_effort,DAILY_goal2_importance,DAILY_goal1_set,DAILY_goal2_set,DAILY_goal1_report,DAILY_goal2_report,DAILY_goal2_interaction_eachOther,DAILY_goal1_motivationExternal,DAILY_goal1_motivationInternal,DAILY_goal2_motivationExternal,DAILY_goal2_motivationInternal
0,0501ba67-3406-4779-aff1-878a0e9f7885,2022-09-30,4.0,3.0,6.0,5.0,6.0,1.0,7.0,4.0,"Study history and psychology, practice the vio...",Do exercise,,,,7.0,6.0,3.0,7.0
1,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-01,5.0,4.0,7.0,7.0,3.0,3.0,5.0,5.0,study history lectures,Watch the historical movie,70.0,100.0,,6.0,7.0,6.0,5.0
2,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-02,7.0,5.0,4.0,7.0,2.0,7.0,7.0,7.0,Watch a historical movie,Review for psy and bio,89.0,0.0,,7.0,5.0,7.0,6.0


In [4]:
goal_df = pd.read_csv(goal_path)
goal_df.head(3)

Unnamed: 0,ParticipantIdentifier,trial_date,DAILY_goal1_set,DAILY_goal2_set,DAILY_goal1_label,DAILY_goal2_label,DAILY_goal1_embedding,DAILY_goal2_embedding
0,0501ba67-3406-4779-aff1-878a0e9f7885,2022-09-30,"Study history and psychology, practice the violin",Do exercise,School,Exercise,"[-0.038126230239868164, -0.021308906376361847,...","[-0.012392040342092514, 0.017055129632353783, ..."
1,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-03,review,movie review,School,Culture,"[0.03161436691880226, 0.029465556144714355, -0...","[-0.02972441539168358, 0.03863658756017685, -0..."
2,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-07,practice the violin,Work out,Hobby,Exercise,"[-0.01599280908703804, -0.0059240879490971565,...","[-0.03653150796890259, 0.02275843359529972, -0..."


___
## REPLICATING EXISTING HYPOTHESIS
___

___
#### 1. WHICH IS MORE CORRELATED TO GOAL GOMPLETION - INTRINSIC OR EXTRINSIC MOTIVATION
Deci, E. L., & Ryan, R. M. (1985). *Intrinsic Motivation and Self-Determination in Human Behavior*. Springer Science & Business Media.
___

In [14]:
cols_1 = [
    "DAILY_goal1_report",
    "DAILY_goal1_motivationExternal",
    "DAILY_goal1_motivationInternal"
]

cols_2 = [
    "DAILY_goal2_report", 
    "DAILY_goal2_motivationExternal",
    "DAILY_goal2_motivationInternal"
]

q1_df_goal1 = self_report_df[cols_1].rename(columns={
    "DAILY_goal1_report": "report",
    "DAILY_goal1_motivationExternal": "motivationExternal",
    "DAILY_goal1_motivationInternal": "motivationInternal"
})

q1_df_goal2 = self_report_df[cols_2].rename(columns={
    "DAILY_goal2_report": "report",
    "DAILY_goal2_motivationExternal": "motivationExternal",
    "DAILY_goal2_motivationInternal": "motivationInternal"
})


q1_df = pd.concat([q1_df_goal1, q1_df_goal2]).dropna()
q1_df.head(2)

Unnamed: 0,report,motivationExternal,motivationInternal
1,70.0,6.0,7.0
2,89.0,7.0,5.0


In [15]:
corr, p_value = pearsonr(q1_df['report'], q1_df['motivationExternal'])
print(f"Correlation between goal completion and External Motivation: {corr}")
print(f"P-value: {p_value}")

Correlation between goal completion and External Motivation: 0.007010375660294542
P-value: 0.31625203522946443


In [16]:
corr, p_value = pearsonr(q1_df['report'], q1_df['motivationInternal'])
print(f"Correlation between goal completion and Internal Motivation: {corr}")
print(f"P-value: {p_value}")


Correlation between goal completion and Internal Motivation: 0.10241681226048392
P-value: 8.692246617964719e-49


- Correlation between internal motivation and goal completion seems to be positive
- Effect size is small but statistically significant.

___
#### 2. Higher goal similarity should lead to higher motivation levels
High similarity between two daily goals (aligned goals) is likely to enhance the motivation and improve completion rates

Locke, E. A., & Latham, G. P. (1990). *A Theory of Goal Setting & Task Performance*. Prentice-Hall, Inc.
___

In [36]:
q2_df = goal_df[[
    "ParticipantIdentifier", "trial_date",
    "DAILY_goal1_embedding", "DAILY_goal2_embedding"]
].dropna()

q2_df["daily_goal_similarity"] = q2_df.apply(
    lambda x:  cosine_similarity(
        [json.loads(x['DAILY_goal1_embedding'])], 
        [json.loads(x['DAILY_goal2_embedding'])]
    )[0][0], axis=1
)

q2_df = q2_df.merge(self_report_df, on=["ParticipantIdentifier", "trial_date"])
q2_df = q2_df[[
    "ParticipantIdentifier", "trial_date", "daily_goal_similarity", 
    "DAILY_goal1_motivationExternal", "DAILY_goal1_motivationInternal",
    "DAILY_goal2_motivationExternal", "DAILY_goal2_motivationInternal"
]]

q2_df_goal1 = q2_df[[
    "ParticipantIdentifier", "trial_date", "daily_goal_similarity", 
    "DAILY_goal1_motivationExternal", "DAILY_goal1_motivationInternal",
]].rename(
    columns={
        "DAILY_goal1_motivationExternal": "motivationExternal",
        "DAILY_goal1_motivationInternal": "motivationInternal"
    }
)

q2_df_goal2 = q2_df[[
    "ParticipantIdentifier", "trial_date", "daily_goal_similarity", 
    "DAILY_goal2_motivationExternal", "DAILY_goal2_motivationInternal",
]].rename(
    columns={
        "DAILY_goal2_motivationExternal": "motivationExternal",
        "DAILY_goal2_motivationInternal": "motivationInternal"
    }
)

q2_df = pd.concat([q2_df_goal1, q2_df_goal2]).dropna()
q2_df.head(2)

Unnamed: 0,ParticipantIdentifier,trial_date,daily_goal_similarity,motivationExternal,motivationInternal
0,0501ba67-3406-4779-aff1-878a0e9f7885,2022-09-30,0.303563,7.0,6.0
1,0501ba67-3406-4779-aff1-878a0e9f7885,2022-10-03,0.48476,7.0,4.0


In [37]:
corr, p_value = pearsonr(q2_df['daily_goal_similarity'], q2_df['motivationExternal'])
print(f"Correlation between daily goal similarity and External Motivation: {corr}")
print(f"P-value: {p_value}")

Correlation between daily goal similarity and External Motivation: 0.0828433037541802
P-value: 6.12103858165426e-06


In [38]:
corr, p_value = pearsonr(q2_df['daily_goal_similarity'], q2_df['motivationInternal'])
print(f"Correlation between daily goal similarity and Internal Motivation: {corr}")
print(f"P-value: {p_value}")

Correlation between daily goal similarity and Internal Motivation: -0.08411622370607084
P-value: 4.392393675079257e-06


- External Motivation is positively correlated with daily goal similarity
- Internal Motivation is positively correlated with daily goal similarity
- Both effects are small but significant

You can explore both existing hypotheses in the area of goal-setting and motivation, as well as pose new research questions. Here’s a breakdown:

### **Existing Hypotheses You Can Replicate:**

2. **Goal Conflict and Performance**:
   - Hypothesis: When individuals perceive a high degree of conflict between their goals (e.g., when their two daily goals are in competition), their overall goal completion percentage decreases.
   - Research: Conflict theory, as discussed by Emmons & King (1988), suggests that conflicting goals reduce cognitive resources, leading to lower performance.
     - *Reference*: Emmons, R. A., & King, L. A. (1988). Conflict among personal strivings: Immediate and long-term implications for psychological and physical well-being. *Journal of Personality and Social Psychology*, 54(6), 1040.


### **New Research Questions:**

1. **Does the interaction between internal and external motivation predict goal similarity or conflict?**
   - Question: How do varying levels of internal and external motivation interact to influence goal similarity or conflict? Does having strong intrinsic motivation reduce the likelihood of conflict between goals?

2. **What role does motivation play in mediating the effects of goal conflict on completion rates?**
   - Question: When two daily goals conflict, does motivation (internal or external) mediate the impact of this conflict on goal completion percentages? 

3. **How does the fluctuation of internal versus external motivation across days impact long-term goal pursuit?**
   - Question: Do day-to-day shifts in internal and external motivation predict patterns in goal similarity or completion rates over time?

4. **How does the interaction of goal similarity/conflict affect motivation in subsequent goal-setting?**
   - Question: Does experiencing success in one goal (especially in the context of similar or dissimilar goals) affect internal or external motivation in setting future goals?

### **Papers to Start Your Research:**

- **Self-Determination Theory**: Deci, E. L., & Ryan, R. M. (2000). The "what" and "why" of goal pursuits: Human needs and the self-determination of behavior. *Psychological Inquiry*, 11(4), 227-268.
  
- **Goal Conflict**: Emmons, R. A., & King, L. A. (1988). Conflict among personal strivings: Immediate and long-term implications for psychological and physical well-being. *Journal of Personality and Social Psychology*, 54(6), 1040.

- **Goal Similarity**: Kruglanski, A. W., et al. (2002). Goal Systems Theory: The Coherence of Self-Regulation. In *Advances in Experimental Social Psychology* (Vol. 34, pp. 331-378).

- **Motivation and Goal Attainment**: Elliot, A. J., & Church, M. A. (1997). A hierarchical model of approach and avoidance achievement motivation. *Journal of Personality and Social Psychology*, 72(1), 218.

These will give you foundational theory and an evidence base for your hypotheses, as well as inspiration for new lines of inquiry.
