In [3]:
import os

from langchain_community.llms import Ollama

In [4]:
llm_to_use = "llama3"

In [5]:
llm = Ollama(
    model=llm_to_use
)


In [17]:
generate_user_prompt = """
Could you generate 20 Singaporean profiles between the age of 18 to 70, please include their name, age, gender, lifestyle, occupation, fitness, and app_usage_level (a random number from 0-100).

You have to generate profiles with good race diversity.

Return the answer in json list format denoted in triple backtick below
```
[
{"Name": Nate Lee, "Age": 36, "Gender": "Male", "Lifestyle": "Very introverted, loves good food, a father of a 16 month daughter", "Occupation": "Software Engineer", "Fitness": "Good, exercises twice a week and tries to be active", "app_usage_level": 40},
{...},
{...},
{...}
]
```
"""

In [18]:
from langchain.prompts import PromptTemplate

llm_prompt = PromptTemplate(
    input_variables=["input"],
    template="You are good at generating data for machine learning training. please generate required data based on the following instruction: {input}")

print(llm_prompt)


input_variables=['input'] template='You are good at generating data for machine learning training. please generate required data based on the following instruction: {input}'


In [19]:
from langchain.chains import LLMChain
test_chain = LLMChain(llm=llm, 
                      prompt=llm_prompt, 
                      verbose=True)


In [20]:
generated_users = test_chain.run(input=generate_user_prompt)




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are good at generating data for machine learning training. please generate required data based on the following instruction: 
Could you generate 20 Singaporean profiles between the age of 18 to 70, please include their name, age, gender, lifestyle, occupation, fitness, and app_usage_level (a random number from 0-100).

You have to generate profiles with good race diversity.

Return the answer in json list format denoted in triple backtick below
```
[
{"Name": Nate Lee, "Age": 36, "Gender": "Male", "Lifestyle": "Very introverted, loves good food, a father of a 16 month daughter", "Occupation": "Software Engineer", "Fitness": "Good, exercises twice a week and tries to be active", "app_usage_level": 40},
{...},
{...},
{...}
]
```
[0m

[1m> Finished chain.[0m


In [22]:
import re 
import json

match = re.search(r'```(.*?)```', generated_users, re.DOTALL)
if match:
    json_list = match.group(1).strip()
else:
    json_list = "No JSON list found."

profiles_list = json.loads(json_list.replace('json', ''))

for profile in profiles_list:
    print(profile)

{'Name': 'Aisha Tan', 'Age': 42, 'Gender': 'Female', 'Lifestyle': 'Outgoing, loves trying new foods, busy working mom of two', 'Occupation': 'Marketing Manager', 'Fitness': 'Fair, tries to make time for yoga and walks during weekends', 'app_usage_level': 60}
{'Name': 'Kavin Rajah', 'Age': 29, 'Gender': 'Male', 'Lifestyle': 'Tech enthusiast, enjoys gaming, works from home as a freelance writer', 'Occupation': 'Freelance Writer', 'Fitness': 'Good, plays basketball with friends and tries to stay active', 'app_usage_level': 80}
{'Name': 'Yasmin Binte Ali', 'Age': 56, 'Gender': 'Female', 'Lifestyle': 'Relaxed, loves gardening, retired teacher', 'Occupation': 'Retired Teacher', 'Fitness': 'Fair, walks around the neighborhood with her dog', 'app_usage_level': 30}
{'Name': 'Ethan Tan', 'Age': 25, 'Gender': 'Male', 'Lifestyle': 'Active, enjoys hiking, works as a personal trainer', 'Occupation': 'Personal Trainer', 'Fitness': 'Excellent, trains clients and stays fit', 'app_usage_level': 90}
{'Na

In [23]:
all_task = ["Walking 1000 steps", "Sleep 8 hours", "Jogging for 30 minutes", "Yoga for 1 hour", "Cycling for 45 minutes", "Swimming for 1 hour", 
            "Meditation for 20 minutes", "Strength training for 1 hour", "Dancing for 30 minutes", "Hiking for 2 hours", "Playing basketball for 1 hour", 
            "Playing soccer for 1.5 hours", "Rowing for 30 minutes"]

all_task_str = str(all_task)


In [44]:

ut_prompt = """
    For the below user in triple backticks:

    ```{user_profile}```

    Making use of all user profile information, including Age, Gender, Lifestyle, Occupation, Fitness,
    and app_usage_level (how much the user uses the app).

    Generate participation probability (1 for 100 percent completion probability, 0 for 0 percent completion probability),
    'LIKE' (1 for like, 0 for not answered, -1 for dislike), and an explanation for the task below (in triple backticks):
    ```{task}```

    **ONLY** Return the 'participation probability', 'LIKE', and 'explanation' in JSON format. No other irrelevant answers.\n\n
    These three field **MUST** be included in the results.
    ```json
    {{"participation_probability": $participation_probability, "like": $like, "explanation": $explanation}}
    ```
"""

prompt_template_ut = PromptTemplate(
    input_variables=["user_profile", "task"],
    template=ut_prompt
)

llm = Ollama(
    model=llm_to_use,
    temperature=0.1
)

chain_ut = LLMChain(llm=llm, prompt=prompt_template_ut)

results = defaultdict(list)
unparseable_results = []

for user_profile in profiles_list[:1]:
    for task in all_task[:2]:
        response = chain_ut.run(user_profile=user_profile, task=task)
        
        try:
            match = re.search(r'```json(.*?)```', response, re.DOTALL)
            response_json = json.loads(match.group(1).strip())
            # Check if all 3 fields are present
            if "participation_probability" in response_json and "like" in response_json and "explanation" in response_json:
                results[user_profile["Name"]].append({
                    "task_name": task,
                    "analysis": response_json
                })
            else:
                unparseable_results.append((user_profile["Name"], task))
        except json.JSONDecodeError:
            unparseable_results.append((user_profile["Name"], task))

print("Results:", results)
print("Unparseable Results:", unparseable_results)


Results: defaultdict(<class 'list'>, {'Aisha Tan': [{'task_name': 'Walking 1000 steps', 'analysis': {'participation_probability': 0.8, 'like': 1, 'explanation': 'As an outgoing and busy working mom of two, Aisha Tan might appreciate a task that gets her moving and engaged. The task to walk 1000 steps aligns with her fair fitness level and willingness to make time for yoga and walks during weekends.'}}, {'task_name': 'Sleep 8 hours', 'analysis': {'participation_probability': 0.8, 'like': 1, 'explanation': "As a busy working mom of two, Aisha Tan values her sleep and prioritizes getting at least 7-8 hours of rest to recharge for the next day. The task 'Sleep 8 hours' resonates with her lifestyle."}}]})
Unparseable Results: []


In [53]:
import json
import time
import codecs

file_name = f'./prompts/{time.time()}.json'

file_to_write = codecs.open(file_name, 'w', encoding='utf-8') 
to_write_dict = {
    "llm": llm_to_use,
    "user_prompt": generate_user_prompt,
    "user_task_prompt": ut_prompt,
    "task_list": all_task,
    "user_response": profiles_list,
    "user_task_response": results
}

json.dump(to_write_dict, file_to_write, indent=4, ensure_ascii=False)
print(file_name)

./prompts/1721007349.018964.json


In [49]:
to_write_dict

{'llm': 'llama3',
 'user_prompt': '\nCould you generate 20 Singaporean profiles between the age of 18 to 70, please include their name, age, gender, lifestyle, occupation, fitness, and app_usage_level (a random number from 0-100).\n\nYou have to generate profiles with good race diversity.\n\nReturn the answer in json list format denoted in triple backtick below\n```\n[\n{"Name": Nate Lee, "Age": 36, "Gender": "Male", "Lifestyle": "Very introverted, loves good food, a father of a 16 month daughter", "Occupation": "Software Engineer", "Fitness": "Good, exercises twice a week and tries to be active", "app_usage_level": 40},\n{...},\n{...},\n{...}\n]\n```\n',
 'user_task_prompt': '\n    For the below user in triple backticks:\n\n    ```{user_profile}```\n\n    Making use of all user profile information, including Age, Gender, Lifestyle, Occupation, Fitness,\n    and app_usage_level (how much the user uses the app).\n\n    Generate participation probability (1 for 100 percent completion pro

In [58]:
# Load user profiles generated from gemini
gemini_json_file = json.load(codecs.open('prompts/1720761288.766782.json'))
user_profile_gemini = gemini_json_file.get('user_response')



ut_prompt = """
    For the below user in triple backticks:

    ```{user_profile}```

    Making use of all user profile information, including Age, Gender, Lifestyle, Occupation, Fitness,
    and app_usage_level (how much the user uses the app).

    Generate participation probability (1 for 100 percent completion probability, 0 for 0 percent completion probability),
    'LIKE' (1 for like, 0 for not answered, -1 for dislike), and an explanation for the task below (in triple backticks):
    ```{task}```

    **ONLY** Return the 'participation probability', 'LIKE', and 'explanation' in JSON format. No other irrelevant answers.\n\n
    These three field **MUST** be included in the results.
    ```json
    {{"participation_probability": $participation_probability, "like": $like, "explanation": $explanation}}
    ```
"""

prompt_template_ut = PromptTemplate(
    input_variables=["user_profile", "task"],
    template=ut_prompt
)

llm = Ollama(
    model=llm_to_use,
    temperature=0.1
)

chain_ut = LLMChain(llm=llm, prompt=prompt_template_ut)

results = defaultdict(list)
unparseable_results = []

for user_profile in user_profile_gemini:
    for task in all_task:
        response = chain_ut.run(user_profile=user_profile, task=task)
        
        try:
            match = re.search(r'```json(.*?)```', response, re.DOTALL)
            response_json = json.loads(match.group(1).strip())
            # Check if all 3 fields are present
            if "participation_probability" in response_json and "like" in response_json and "explanation" in response_json:
                results[user_profile["Name"]].append({
                    "task_name": task,
                    "analysis": response_json
                })
            else:
                unparseable_results.append((user_profile["Name"], task))
        except json.JSONDecodeError:
            unparseable_results.append((user_profile["Name"], task))

print("Results:", results)
print("Unparseable Results:", unparseable_results)


Results: defaultdict(<class 'list'>, {'Nate Lee': [{'task_name': 'Walking 1000 steps', 'analysis': {'participation_probability': 0.8, 'like': 1, 'explanation': 'As a software engineer and father of a 16-month-old daughter, Nate Lee values his physical activity to stay healthy and energetic for his family. His good fitness level and regular exercise routine suggest he will participate in this task with high probability.'}}, {'task_name': 'Sleep 8 hours', 'analysis': {'participation_probability': 0.8, 'like': 1, 'explanation': 'As a software engineer with a good fitness level and an active lifestyle, Nate Lee is likely to be interested in improving his sleep quality. His introverted nature might make him more inclined to prioritize self-care activities like getting enough sleep.'}}, {'task_name': 'Jogging for 30 minutes', 'analysis': {'participation_probability': 0.8, 'like': 1, 'explanation': "As a software engineer and father of a 16-month-old daughter, Nate Lee values his time and pri

In [62]:
results

defaultdict(list,
            {'Nate Lee': [{'task_name': 'Walking 1000 steps',
               'analysis': {'participation_probability': 0.8,
                'like': 1,
                'explanation': 'As a software engineer and father of a 16-month-old daughter, Nate Lee values his physical activity to stay healthy and energetic for his family. His good fitness level and regular exercise routine suggest he will participate in this task with high probability.'}},
              {'task_name': 'Sleep 8 hours',
               'analysis': {'participation_probability': 0.8,
                'like': 1,
                'explanation': 'As a software engineer with a good fitness level and an active lifestyle, Nate Lee is likely to be interested in improving his sleep quality. His introverted nature might make him more inclined to prioritize self-care activities like getting enough sleep.'}},
              {'task_name': 'Jogging for 30 minutes',
               'analysis': {'participation_probabilit

In [59]:
import json
import time
import codecs

file_name = f'./prompts/{time.time()}.json'

file_to_write = codecs.open(file_name, 'w', encoding='utf-8') 
to_write_dict = {
    "llm": llm_to_use,
    "user_prompt": generate_user_prompt,
    "user_task_prompt": ut_prompt,
    "task_list": all_task,
    "user_response": profiles_list,
    "user_task_response": results
}

json.dump(to_write_dict, file_to_write, indent=4, ensure_ascii=False)
print(file_name)

./prompts/1721016902.404383.json


In [None]:
# Generate markdown results

In [67]:
# List of user data
users = [
    {
           	 "Name": "Aishah Rahman",
            "Age": 28,
            "Gender": "Female",
            "Lifestyle": "Social butterfly, enjoys exploring new cafes and restaurants, loves to travel",
            "Occupation": "Marketing Manager",
            "Fitness": "Moderate, attends yoga classes once a week",
            "app_usage_level": 85
        },
        {
            "Name": "David Tan",
            "Age": 55,
            "Gender": "Male",
            "Lifestyle": "Retired, enjoys gardening, spending time with his grandchildren",
            "Occupation": "Retired",
            "Fitness": "Fair, walks regularly and does light exercises",
            "app_usage_level": 20
        },
        {
            "Name": "Sarah Lim",
            "Age": 22,
            "Gender": "Female",
            "Lifestyle": "Student, enjoys reading, attending concerts and festivals",
            "Occupation": "Student",
            "Fitness": "Active, plays badminton regularly",
            "app_usage_level": 95
        },
        {
            "Name": "Kumar Singh",
            "Age": 48,
            "Gender": "Male",
            "Lifestyle": "Family-oriented, enjoys cooking, spending time with his family",
            "Occupation": "Accountant",
            "Fitness": "Fair, tries to exercise occasionally",
            "app_usage_level": 35
        },
        {
            "Name": "Emily Chan",
            "Age": 62,
            "Gender": "Female",
            "Lifestyle": "Active, enjoys swimming, volunteering at the local community center",
            "Occupation": "Retired teacher",
            "Fitness": "Good, swims regularly and attends fitness classes",
            "app_usage_level": 15
        }
]

# Function to convert user data to markdown table
def convert_to_markdown(users):
    headers = ["Name", "Age", "Gender", "Lifestyle", "Occupation", "Fitness", "app usage level"]
    markdown = f"| {' | '.join(headers)} |\n| {' | '.join(['---'] * len(headers))} |\n"
    
    for user in users:
        row = [str(user[header.replace(' ', '_')]) for header in headers]
        markdown += f"| {' | '.join(row)} |\n"
    
    return markdown

# Convert the user data to markdown and print
markdown_table = convert_to_markdown(users)
print(markdown_table)

| Name | Age | Gender | Lifestyle | Occupation | Fitness | app usage level |
| --- | --- | --- | --- | --- | --- | --- |
| Aishah Rahman | 28 | Female | Social butterfly, enjoys exploring new cafes and restaurants, loves to travel | Marketing Manager | Moderate, attends yoga classes once a week | 85 |
| David Tan | 55 | Male | Retired, enjoys gardening, spending time with his grandchildren | Retired | Fair, walks regularly and does light exercises | 20 |
| Sarah Lim | 22 | Female | Student, enjoys reading, attending concerts and festivals | Student | Active, plays badminton regularly | 95 |
| Kumar Singh | 48 | Male | Family-oriented, enjoys cooking, spending time with his family | Accountant | Fair, tries to exercise occasionally | 35 |
| Emily Chan | 62 | Female | Active, enjoys swimming, volunteering at the local community center | Retired teacher | Good, swims regularly and attends fitness classes | 15 |



In [70]:
# List of task analysis data
tasks = [
            {
                "task_name": "Walking 1000 steps",
                "analysis": {
                    "participation_probability": 0.8,
                    "like": 1,
                    "explanation": "As a retired librarian and someone who enjoys reading and volunteering at the local library, Catherine Lee is likely to be interested in physical activity that promotes well-being and social connections, such as walking. Her age and fitness level suggest she may not be able to commit to a strenuous task like walking 1000 steps, but she might still participate with some enthusiasm."
                }
            },
            {
                "task_name": "Sleep 8 hours",
                "analysis": {
                    "participation_probability": 0.8,
                    "like": 1,
                    "explanation": "As a retired librarian, Catherine Lee likely values sleep and relaxation, making it more likely for her to participate in this task. Her age and fitness level also suggest she prioritizes self-care, which aligns with the importance of sleep."
                }
            },
            {
                "task_name": "Jogging for 30 minutes",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian and avid reader, Catherine Lee may appreciate the physical activity and mental stimulation that jogging provides. Her fair fitness level suggests she's capable of moderate exercise, and her app usage level indicates she's engaged with technology. Given her age (65) and gender (Female), we assume she'll be cautious but willing to try new things."
                }
            },
            {
                "task_name": "Yoga for 1 hour",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian and avid reader, Catherine Lee may appreciate the calming and meditative aspects of yoga. Her fair fitness level suggests she may not be an experienced yogi, but her willingness to engage in light exercises indicates some openness to physical activity. Given her age (65) and relatively low app usage level (18), we assume she may not be as tech-savvy or familiar with the app's features, which might affect her participation probability."
                }
            },
            {
                "task_name": "Cycling for 45 minutes",
                "analysis": {
                    "participation_probability": 0.8,
                    "like": 1,
                    "explanation": "Catherine Lee's retired lifestyle and occupation as a librarian suggest she may have more time for physical activities like cycling. Her age of 65 and fair fitness level indicate some caution is needed, but her app usage level suggests she may be open to trying new things."
                }
            },
            {
                "task_name": "Swimming for 1 hour",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian and avid reader, Catherine Lee may not have experience with swimming but her active lifestyle and willingness to volunteer at the local library suggest she might be open to trying new activities. Her age and fitness level also indicate that she may need some encouragement to participate in physically demanding activities like swimming."
                }
            },
            {
                "task_name": "Meditation for 20 minutes",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "Given Catherine Lee's retired lifestyle and occupation as a librarian, she may appreciate the calming effects of meditation. Her fair fitness level suggests she values relaxation and self-care. The app_usage_level is moderate, indicating she is familiar with technology and may be open to trying new experiences."
                }
            },
            {
                "task_name": "Strength training for 1 hour",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian and enthusiast of reading and volunteering, Catherine Lee may be more likely to participate in strength training as it aligns with her active lifestyle. The app usage level suggests she is moderately engaged, which also increases the likelihood of participation."
                }
            },
            {
                "task_name": "Dancing for 30 minutes",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian, Catherine Lee has a strong appreciation for physical activity and may enjoy dancing as a way to stay active and engaged. Her age and fitness level suggest that she may not be able to commit to the full 30 minutes, but her overall lifestyle suggests a willingness to participate."
                }
            },
            {
                "task_name": "Hiking for 2 hours",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian and avid reader, Catherine Lee likely appreciates nature and outdoor activities. Her fair fitness level suggests she may not be an experienced hiker, but her willingness to walk regularly indicates some physical activity tolerance. Given her age (65) and occupation, it's reasonable to assume she might enjoy a leisurely hike with scenic views. The app usage level of 18 suggests moderate engagement, which could translate to a positive experience."
                }
            },
            {
                "task_name": "Playing basketball for 1 hour",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "As a retired librarian and avid reader, Catherine Lee may not be as physically active as others, but her fair fitness level suggests she might still enjoy playing basketball for an hour. Her age (65) is also a factor to consider, so I've adjusted the participation probability accordingly. Given her positive lifestyle and occupation, I'm confident she'll have a good time and give it a LIKE."
                }
            },
            {
                "task_name": "Playing soccer for 1.5 hours",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "Catherine Lee's retired lifestyle and fair fitness level suggest she may not be as physically active as others, but her occupation as a librarian shows she values learning and community engagement, making it likely she'll participate in the task. Her app usage level is moderate, indicating she's engaged with technology, which could also contribute to her participation. The 'LIKE' score is high because Catherine Lee's personality traits align well with the task."
                }
            },
            {
                "task_name": "Rowing for 30 minutes",
                "analysis": {
                    "participation_probability": 0.7,
                    "like": 1,
                    "explanation": "Catherine Lee's retired lifestyle and occupation as a librarian suggest she may have more free time to participate in this task. Her age of 65 might make her less inclined to engage in high-intensity activities like rowing, but the fact that she walks regularly and does light exercises indicates some level of physical fitness. The app_usage_level of 18 suggests moderate usage, which could indicate a willingness to try new things. Overall, I predict a 70% participation probability, with a LIKE response."
                }
            }
        ]

# Function to convert task analysis data to markdown
def convert_tasks_to_markdown(tasks):
    markdown = ""
    
    for task in tasks:
        markdown += f"## {task['task_name']}\n\n"
        analysis = task['analysis']
        markdown += f"**Participation Probability:** {analysis['participation_probability']}\n\n"
        markdown += f"**Like:** {analysis['like']}\n\n"
        markdown += f"**Explanation:** {analysis['explanation']}\n\n"
    
    return markdown

# Convert the task analysis data to markdown and print
markdown_tasks = convert_tasks_to_markdown(tasks)
print(markdown_tasks)

## Walking 1000 steps

**Participation Probability:** 0.8

**Like:** 1

**Explanation:** As a retired librarian and someone who enjoys reading and volunteering at the local library, Catherine Lee is likely to be interested in physical activity that promotes well-being and social connections, such as walking. Her age and fitness level suggest she may not be able to commit to a strenuous task like walking 1000 steps, but she might still participate with some enthusiasm.

## Sleep 8 hours

**Participation Probability:** 0.8

**Like:** 1

**Explanation:** As a retired librarian, Catherine Lee likely values sleep and relaxation, making it more likely for her to participate in this task. Her age and fitness level also suggest she prioritizes self-care, which aligns with the importance of sleep.

## Jogging for 30 minutes

**Participation Probability:** 0.7

**Like:** 1

**Explanation:** As a retired librarian and avid reader, Catherine Lee may appreciate the physical activity and mental stim