In [1]:
import os
import getpass

os.environ['GOOGLE_API_KEY'] = getpass.getpass('Gemini API Key:')

In [4]:
llm_to_use = "gemini-1.5-flash"

In [5]:

from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model=llm_to_use,
                             temperature=0.7, 
                             top_p=0.85)


In [6]:
generate_user_prompt = """
Could you generate 20 Singaporean profiles between the age of 18 to 70, please include their name, age, gender, lifestyle, occupation, fitness, and app_usage_level (a random number from 0-100).

You should pay attention to races diversity

Return the answer in json list format denoted in triple backtick below
```
[
{"Name": Nate Lee, "Age": 36, "Gender": "Male", "Lifestyle": "Very introverted, loves good food, a father of a 16 month daughter", "Occupation": "Software Engineer", "Fitness": "Good, exercises twice a week and tries to be active", "app_usage_level": 40},
{...},
{...},
{...}
]
```
"""

In [7]:
from langchain.prompts import PromptTemplate

llm_prompt = PromptTemplate(
    input_variables=["input"],
    template="You are good at generating data for machine learning training. please generate required data based on the following instruction: {input}")

print(llm_prompt)


input_variables=['input'] template='You are good at generating data for machine learning training. please generate required data based on the following instruction: {input}'


In [8]:
from langchain.chains import LLMChain
test_chain = LLMChain(llm=llm, 
                      prompt=llm_prompt, 
                      verbose=True)


  warn_deprecated(


In [9]:
generated_users = test_chain.run(input=generate_user_prompt)


  warn_deprecated(




[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mYou are good at generating data for machine learning training. please generate required data based on the following instruction: 
Could you generate 20 Singaporean profiles between the age of 18 to 70, please include their name, age, gender, lifestyle, occupation, fitness, and app_usage_level (a random number from 0-100).

You should consider races diversity

Return the answer in json list format denoted in triple backtick below
```
[
{"Name": Nate Lee, "Age": 36, "Gender": "Male", "Lifestyle": "Very introverted, loves good food, a father of a 16 month daughter", "Occupation": "Software Engineer", "Fitness": "Good, exercises twice a week and tries to be active", "app_usage_level": 40},
{...},
{...},
{...}
]
```
[0m

[1m> Finished chain.[0m


In [10]:
import re 
import json

match = re.search(r'```(.*?)```', generated_users, re.DOTALL)
if match:
    json_list = match.group(1).strip()
else:
    json_list = "No JSON list found."

profiles_list = json.loads(json_list)

for profile in profiles_list:
    print(profile)

{'Name': 'Nate Lee', 'Age': 36, 'Gender': 'Male', 'Lifestyle': 'Very introverted, loves good food, a father of a 16 month daughter', 'Occupation': 'Software Engineer', 'Fitness': 'Good, exercises twice a week and tries to be active', 'app_usage_level': 40}
{'Name': 'Aishah Rahman', 'Age': 28, 'Gender': 'Female', 'Lifestyle': 'Social butterfly, enjoys exploring new cafes and restaurants, loves to travel', 'Occupation': 'Marketing Manager', 'Fitness': 'Moderate, attends yoga classes once a week', 'app_usage_level': 85}
{'Name': 'David Tan', 'Age': 55, 'Gender': 'Male', 'Lifestyle': 'Retired, enjoys gardening, spending time with his grandchildren', 'Occupation': 'Retired', 'Fitness': 'Fair, walks regularly and does light exercises', 'app_usage_level': 20}
{'Name': 'Sarah Lim', 'Age': 22, 'Gender': 'Female', 'Lifestyle': 'Student, enjoys reading, attending concerts and festivals', 'Occupation': 'Student', 'Fitness': 'Active, plays badminton regularly', 'app_usage_level': 95}
{'Name': 'Kuma

In [11]:
all_task = ["Walking 1000 steps", "Sleep 8 hours", "Jogging for 30 minutes", "Yoga for 1 hour", "Cycling for 45 minutes", "Swimming for 1 hour", 
            "Meditation for 20 minutes", "Strength training for 1 hour", "Dancing for 30 minutes", "Hiking for 2 hours", "Playing basketball for 1 hour", 
            "Playing soccer for 1.5 hours", "Rowing for 30 minutes"]

all_task_str = str(all_task)


In [12]:

ut_prompt = """
    For the below user in triple backticks\n\n
    ```{user_profile}```\n\n
    Making use of all user profile information, including Age, Gender, Lifestyle, Occupation, Fitness,
    and app_usage_level (how much the user uses the app)."
    Generate participation probability (1 for 100 percent completion probability, 0 for 0 percent completion probability)
    and 'LIKE' for these tasks below (1 for like, 0 for not answered, -1 for dislike) when these tasks are recommended to a user via an app.
    tasks are in triple backticks below.\n
    ```{task}```\n\n
    Return 'participation probability, like and explaination in JSON format
    ```
"""


prompt_template_ut = PromptTemplate(
    input_variables=["user_profile", "task"],
    template=ut_prompt
)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",
                             top_p=0.85)

chain_ut = LLMChain(llm=llm, prompt=prompt_template_ut)

from collections import defaultdict

results = defaultdict(list)
for user_profile in profiles_list:
    for task in all_task:
        response = chain_ut.run(user_profile=user_profile, task=task)
        import re 
        import json

        match = re.search(r'```json(.*?)```', response, re.DOTALL)
        if match:
            response_json = json.loads(match.group(1).strip())
        else:
            json_list = "No JSON list found."
            break

        results[user_profile["Name"]].append({
            "task_name": task,
            "analysis": response_json
        })


In [13]:
results['Nate Lee']

[{'task_name': 'Walking 1000 steps',
  'analysis': {'participation_probability': 0.6,
   'like': 1,
   'explanation': "Nate is a software engineer with a good fitness level who exercises twice a week. This suggests he values physical activity. While he is introverted, walking 1000 steps is a low-key activity that can be done solo.  His age and fatherhood may make him more inclined towards health and fitness. The app_usage_level of 40 indicates moderate app engagement, suggesting he's open to trying new features like this."}},
 {'task_name': 'Sleep 8 hours',
  'analysis': {'participation_probability': 0.6,
   'like': 1,
   'explanation': "Nate is a software engineer with a young daughter, suggesting he might value sleep for his work and family responsibilities. While his fitness level indicates some focus on health, it doesn't necessarily mean he prioritizes sleep. His introverted nature could also align with a preference for rest. The app usage level is moderate, indicating he might be

In [21]:
import json
import time
import codecs


file_to_write = codecs.open(f'./prompts/{time.time()}.json', 'w')
to_write_dict = {
    "llm": llm_to_use,
    "user_prompt": generate_user_prompt,
    "user_task_prompt": ut_prompt,
    "task_list": all_task,
    "user_response": profiles_list,
    "user_task_response": results
}

json.dump(to_write_dict, file_to_write, indent=4)


In [18]:
results['William Lim']

[{'task_name': 'Walking 1000 steps',
  'analysis': {'participation_probability': 0.8,
   'like': 1,
   'explanation': 'William is a 52 year old male doctor who enjoys hiking and spending time with his family. He has good fitness and hikes regularly. This suggests he is active and likely enjoys walking. His app usage level of 45 indicates he is a regular user. Therefore, he is likely to participate in a 1000 step walking task and would likely enjoy it.'}},
 {'task_name': 'Sleep 8 hours',
  'analysis': {'participation_probability': 0.7,
   'like': 1,
   'explanation': 'William Lim is a 52-year-old male doctor who is family-oriented and enjoys hiking. He is in good fitness and exercises regularly. This suggests he values a healthy lifestyle and understands the importance of sleep. While he might not have the time for 8 hours of sleep every night due to his occupation, he is likely to appreciate the recommendation and make an effort to prioritize sleep.'}},
 {'task_name': 'Jogging for 30 m

In [19]:
to_write_dict

{'llm': 'gemini-1.5-flash',
 'user_prompt': '\nCould you generate 20 Singaporean profiles between the age of 18 to 70, please include their name, age, gender, lifestyle, occupation, fitness, and app_usage_level (a random number from 0-100).\n\nYou should consider races diversity\n\nReturn the answer in json list format denoted in triple backtick below\n```\n[\n{"Name": Nate Lee, "Age": 36, "Gender": "Male", "Lifestyle": "Very introverted, loves good food, a father of a 16 month daughter", "Occupation": "Software Engineer", "Fitness": "Good, exercises twice a week and tries to be active", "app_usage_level": 40},\n{...},\n{...},\n{...}\n]\n```\n',
 'user_task_prompt': '\n    For the below user in triple backticks\n\n\n    ```{user_profile}```\n\n\n    Making use of all user profile information, including Age, Gender, Lifestyle, Occupation, Fitness,\n    and app_usage_level (how much the user uses the app)."\n    Generate participation probability (1 for 100 percent completion probability