In [None]:
import pandas as pd
import random
import datetime

# Define the distributions based on example data
genders = ['Male', 'Female']
faculties = ['FAS', 'FLFN', 'FAPM', 'FOT', 'BSF','FOM']
years_of_study = ['1st year', '2nd year', '3rd year', '4th year', 'Postgraduate']
sleep_hours = ['Less than 5 hours', '5-6 hours', '6-8 hours', 'More than 8 hours']
exercise_frequencies = ['Never', 'Rarely', '1-2 times per week', '3-4 times per week', 'Daily']
smoke_alcohol = ['Yes', 'No', 'Occasionally']
main_meals = [2, 3]
snacks = ['Yes', 'No']
food_types = ['Traditional Sri Lankan food', 'Fast food', 'Processed foods', 'Fruits and vegetables', 'Rice and curry']
fast_food_frequency = ['Once a week', '2-3 times a week', 'Weekly', 'Never', 'Daily']
specific_diets = ['No specific diet', 'Vegetarian/Vegan meals', 'Low-carb diet']
food_allergies = ['None', 'Gluten', 'Lactose', 'Seafood', 'Dryfish']
weight_range = ['40 - 49 kg', '50 - 59 kg', '60 - 69 kg', '70 - 79 kg', '80 - 89 kg']
health_conditions = ['None', 'Gastritis', 'High cholesterol', 'Gallstones']
health_status = ['Healthy', 'Average', 'Very Healthy']
medications = ['Yes', 'No']
symptoms = ['None', 'Stomach pain', 'Bloating', 'Fatigue', 'Constipation']
weekly_spend_range = ['Less than 1,000 LKR', '1,000 - 2,000 LKR', '2,000 - 3,000 LKR', 'More than 3,000 LKR']
canteen_usage = ['Daily', 'Occasionally', 'Rarely/Never']
willingness = ['Very willing', 'Somewhat willing', 'Neutral']
preferred_foods = ['Rice and curry', 'Hoppers', 'Roti', 'String hoppers', 'Kottu', 'Jackfruit curry']
nutrition_knowledge = ['Neutral', 'Somewhat familiar', 'Very familiar']
recommendation_method = ['Mobile app notifications', 'Weekly diet plans', 'Email']
goals = ['Better health', 'Manage health conditions', 'Weight management', 'Sustainable diet']
heights = [150, 157, 160, 165, 172, 175]

# Number of synthetic data points to generate
num_samples = 200

# Initialize list to store synthetic data
synthetic_data = []

for _ in range(num_samples):
    # Generate Timestamp (random date after 4/12/2024)
    timestamp = datetime.datetime(2024, 12, 5, random.randint(0, 23), random.randint(0, 59), random.randint(0, 59))

    # Randomly select values for categorical columns
    gender = random.choice(genders)
    faculty = random.choice(faculties)
    year_of_study = random.choice(years_of_study)
    sleep = random.choice(sleep_hours)
    exercise = random.choice(exercise_frequencies)
    smoke_alcohol_choice = random.choice(smoke_alcohol)
    main_meals_count = random.choice(main_meals)
    snack = random.choice(snacks)
    food_type = ', '.join(random.sample(food_types, random.randint(1, len(food_types))))
    fast_food = random.choice(fast_food_frequency)
    diet = random.choice(specific_diets)
    allergies = random.choice(food_allergies)
    weight = random.choice(weight_range)
    health_condition = random.choice(health_conditions)
    health_rating = random.choice(health_status)
    medication = random.choice(medications)
    symptom = random.choice(symptoms)
    weekly_spend = random.choice(weekly_spend_range)
    canteen = random.choice(canteen_usage)
    food_willingness = random.choice(willingness)
    preferred_food = ', '.join(random.sample(preferred_foods, random.randint(1, len(preferred_foods))))
    nutrition_knowledge_choice = random.choice(nutrition_knowledge)
    recommendation = random.choice(recommendation_method)
    goal = random.choice(goals)
    height = random.choice(heights)

    # Create a synthetic sample
    sample = {
        "Timestamp": timestamp.strftime("%d/%m/%Y %H:%M:%S"),
        "What is your age?": random.choice(['18-20', '21-23', '24-26']),
        "What is your gender?": gender,
        "What is your faculty?": faculty,
        "What year of study are you in?": year_of_study,
        "How many hours of sleep do you get on average per night?": sleep,
        "How often do you exercise or engage in physical activities?": exercise,
        "Do you smoke or consume alcohol?": smoke_alcohol_choice,
        "How many main meals do you eat per day?": main_meals_count,
        "Do you regularly snack between meals?": snack,
        "What types of food do you usually eat? (Select all that apply)": food_type,
        "How often do you eat fast food?": fast_food,
        "Do you follow any specific diet?": diet,
        "Do you have any food allergies or intolerances? (Select all that apply)": allergies,
        "Mention if you have any food restrictions": "",
        "What is your current weight?": weight,
        "Do you have any known health conditions that affect your diet? (Select all that apply)": health_condition,
        "How would you rate your current health status?": health_rating,
        "Mention if you currently take any medications that affect your diet?": medication,
        "Have you experienced any of the following symptoms recently? (Select all that apply)": symptom,
        "How much money do you spend on food per week?": weekly_spend,
        "How often do you eat at university canteens?": canteen,
        "How willing are you to try new foods?": food_willingness,
        "Which Sri Lankan foods do you prefer? (Select all that apply)": preferred_food,
        "How familiar are you with basic nutrition concepts (e.g., calories, proteins, carbs)?": nutrition_knowledge_choice,
        "How would you prefer to receive personalized diet recommendations?": recommendation,
        "What is your main goal for using the diet recommendation system?": goal,
        "What is your height (cm) ?": height
    }

    # Add the sample to the list
    synthetic_data.append(sample)

# Create a DataFrame
df_synthetic = pd.DataFrame(synthetic_data)

# Save to an Excel file
df_synthetic.to_excel("/mnt/data/synthetic_student_diet_data.xlsx", index=False)

"/mnt/data/synthetic_student_diet_data.xlsx"


'/mnt/data/synthetic_student_diet_data.xlsx'

In [None]:
from google.colab import files
files.download("/mnt/data/synthetic_student_diet_data.xlsx")


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>