## Library

In [2]:
import pandas as pd

## OkCupid Example csv

In [3]:
okcupid_df = pd.read_csv('okcupid_profiles_1.csv')

In [5]:
okcupid_df.shape

(19999, 31)

In [7]:
okcupid_df.columns

Index(['age', 'status', 'sex', 'orientation', 'body_type', 'diet', 'drinks',
       'drugs', 'education', 'ethnicity', 'height', 'income', 'job',
       'last_online', 'location', 'offspring', 'pets', 'religion', 'sign',
       'smokes', 'speaks', 'essay0', 'essay1', 'essay2', 'essay3', 'essay4',
       'essay5', 'essay6', 'essay7', 'essay8', 'essay9'],
      dtype='object')

In [6]:
okcupid_df.head()

Unnamed: 0,age,status,sex,orientation,body_type,diet,drinks,drugs,education,ethnicity,...,essay0,essay1,essay2,essay3,essay4,essay5,essay6,essay7,essay8,essay9
0,22,single,m,straight,a little extra,strictly anything,socially,never,working on college/university,"asian, white",...,about me: i would love to think that i was so...,currently working as an international agent fo...,making people laugh. ranting about a good salt...,"the way i look. i am a six foot half asian, ha...","books: absurdistan, the republic, of mice and ...",food. water. cell phone. shelter.,duality and humorous things,trying to find someone to hang out with. i am ...,i am new to california and looking for someone...,you want to be swept off your feet! you are ti...
1,35,single,m,straight,average,mostly other,often,sometimes,working on space camp,white,...,i am a chef: this is what that means. 1. i am ...,dedicating everyday to being an unbelievable b...,being silly. having ridiculous amonts of fun w...,,i am die hard christopher moore fan. i don't r...,delicious porkness in all of its glories. my b...,,,i am very open and will share just about anyth...,
2,38,available,m,straight,thin,anything,socially,,graduated from masters program,,...,"i'm not ashamed of much, but writing public te...","i make nerdy software for musicians, artists, ...",improvising in different contexts. alternating...,my large jaw and large glasses are the physica...,okay this is where the cultural matrix gets so...,movement conversation creation contemplation t...,,viewing. listening. dancing. talking. drinking...,"when i was five years old, i was known as ""the...","you are bright, open, intense, silly, ironic, ..."
3,23,single,m,straight,thin,vegetarian,socially,,working on college/university,white,...,i work in a library and go to school. . .,reading things written by old dead people,playing synthesizers and organizing books acco...,socially awkward but i do my best,"bataille, celine, beckett. . . lynch, jarmusch...",,cats and german philosophy,,,you feel so inclined.
4,29,single,m,straight,athletic,,socially,never,graduated from college/university,"asian, black, other",...,hey how's it going? currently vague on the pro...,work work work work + play,creating imagery to look at: http://bagsbrown....,i smile a lot and my inquisitive nature,"music: bands, rappers, musicians at the moment...",,,,,


In [8]:
okcupid_df.head().to_csv('okcupid_profiles_1_head.csv', index=False)

## Use ChatGPT to help Generate Fake Profiles.

In [3]:
import random
from faker import Faker

# Initialize Faker instance
fake = Faker()

# Function to generate random profiles
def generate_random_profile():
    # Random profile data
    birth_date = fake.date_of_birth(minimum_age=18, maximum_age=30)
    gender = random.choice(['m', 'f', 'others'])
    preferred_gender = random.choice(['m', 'f', 'others', 'any'])
    looking_for = random.choice([
        'long-term partner', 'long-term but short-term OK', 'short-term but long-term OK', 'casual fun', 'new friends', 'still figuring it out'
    ])
    faculty = random.choice([
        'Engineering', 'Business', 'Architecture', 'Law', 'Science', 'Medicine', 'Arts'
    ])
    personal_traits = random.sample([
        'outgoing', 'shy', 'funny', 'adventurous', 'thoughtful', 'introverted', 'creative', 'ambitious', 'curious'
    ], k=random.randint(1, 3))
    interests = random.sample([
        'reading', 'traveling', 'sports', 'music', 'cooking', 'hiking', 'gaming', 'photography', 'technology', 'fashion'
    ], k=random.randint(1, 3))
    values = random.sample([
        'honesty', 'loyalty', 'creativity', 'responsibility', 'empathy', 'respect', 'independence', 'spirituality'
    ], k=random.randint(1, 3))
    bio = fake.text(max_nb_chars=200)

    # Return profile
    return {
        'BirthDate': birth_date,
        'Gender': gender,
        'PreferredGender': preferred_gender,
        'LookingFor': looking_for,
        'Faculty': faculty,
        'PersonalTraits': ', '.join(personal_traits),
        'Interests': ', '.join(interests),
        'Values': ', '.join(values),
        'Bio': bio
    }

# Generate random profiles
num_profiles = 100  # Generate 100 profiles
profiles = [generate_random_profile() for _ in range(num_profiles)]

# Convert to DataFrame
df_profiles = pd.DataFrame(profiles)

# Save to CSV
df_profiles.to_csv('random_chulalongkorn_profiles.csv', index=False)

In [4]:
df_profiles

Unnamed: 0,BirthDate,Gender,PreferredGender,LookingFor,Faculty,PersonalTraits,Interests,Values,Bio
0,1999-01-10,others,any,long-term partner,Business,"introverted, curious, funny","sports, fashion",responsibility,Fear relationship account range life mention g...
1,2002-03-20,f,others,long-term partner,Arts,ambitious,"hiking, gaming, reading","independence, honesty, creativity",Statement dinner approach measure near member....
2,2005-09-08,f,f,long-term partner,Law,curious,reading,"loyalty, responsibility",East instead value may score memory up. Loss w...
3,2000-07-16,f,f,new friends,Law,"thoughtful, shy","fashion, sports, reading","spirituality, respect, independence",Laugh western different recent. Sure ready end...
4,2000-11-25,f,f,long-term partner,Arts,"funny, introverted, shy",music,responsibility,Resource determine body military short hand. S...
...,...,...,...,...,...,...,...,...,...
95,2006-03-06,others,any,short-term but long-term OK,Law,thoughtful,technology,"empathy, loyalty, independence",The increase pass. After analysis media cultur...
96,1996-01-21,f,f,casual fun,Architecture,curious,"gaming, technology, sports","honesty, loyalty",Skin market government majority our western. M...
97,1999-09-23,f,others,long-term but short-term OK,Medicine,"curious, outgoing, thoughtful",cooking,"creativity, responsibility, empathy",Bar future note movie pretty a. Analysis analy...
98,2000-02-22,m,m,still figuring it out,Medicine,"ambitious, funny","fashion, music","empathy, respect, responsibility",Technology where if into. Chair hotel war pull...
