In [7]:
import pandas as pd
import numpy as np
import random
from faker import Faker
from datetime import datetime
import math
pd.set_option('display.max_columns', None)

In [10]:
fake = Faker('id_ID')
Faker.seed(42)
random.seed(42)

nama_lokasi_list = ['Mall Central', 'Cafe Kopi Starbucks', 'Terminal Kota', 'Co-Working Office', 'Taman Kota',
                    'Stasiun Utama', 'Mall Grand', 'Warung Kopi Indomie', 'Kampus Utama', 'Taman Literasi']
merk_hp_list = ['Samsung', 'iPhone', 'Oppo', 'Xiaomi', 'Motorola', 'Realme', 'Nokia', 'Zenfone']
minat_digital_list = ['Media Sosial', 'E-commerce', 'Transportasi Online', 'Teknologi & Gadget', 'Game Mobile',
                      'Berita Online', 'Fashion & Lifestyle', 'Konten Video', 'Aplikasi Produktivitas', 'Musik Streaming']
tipe_lokasi_list = ['Mall', 'Kafe', 'Transportasi', 'Co-Working Space', 'Taman']

def generate_email(name):
    domains = ['gmail.com', 'mail.com', 'ymail.com', 'email.com', 'hotmail.com']
    username = name.lower().replace(' ', '.') + str(random.randint(1, 99))
    return f"{username}@{random.choice(domains)}"

data_dummy = []
for _ in range(1000):
    name = fake.name()
    row = {
        "Nama Lokasi": random.choice(nama_lokasi_list),
        "Jam Login": fake.time(pattern="%H:%M"),
        "Nama": name,
        "Email": generate_email(name),
        "No Telepon": fake.phone_number(),
        "Tahun Lahir": random.randint(1975, 2005),
        "Merk HP": random.choice(merk_hp_list),
        "Minat Digital": random.choice(minat_digital_list),
        "Tipe Lokasi": random.choice(tipe_lokasi_list)
    }
    data_dummy.append(row)

df_dummy = pd.DataFrame(data_dummy)


In [11]:
df_dummy

Unnamed: 0,Nama Lokasi,Jam Login,Nama,Email,No Telepon,Tahun Lahir,Merk HP,Minat Digital,Tipe Lokasi
0,Cafe Kopi Starbucks,03:52,"Balidin Dongoran, S.T.","balidin.dongoran,.s.t.4@ymail.com",+62 (18) 196 0013,1982,Xiaomi,Transportasi Online,Mall
1,Kampus Utama,20:20,Johan Suartini,johan.suartini12@hotmail.com,+62-863-794-0265,1988,Samsung,Media Sosial,Mall
2,Co-Working Office,05:44,Eva Pratama,eva.pratama30@hotmail.com,+62 (116) 155 9407,1994,Samsung,Aplikasi Produktivitas,Kafe
3,Kampus Utama,05:33,R. Virman Waluyo,r..virman.waluyo54@mail.com,+62 (0959) 310-3413,1989,Motorola,Media Sosial,Kafe
4,Mall Grand,12:46,"Puti Kezia Setiawan, M.M.","puti.kezia.setiawan,.m.m.44@ymail.com",+62 (255) 341-9283,1979,Xiaomi,Berita Online,Mall
...,...,...,...,...,...,...,...,...,...
995,Mall Grand,23:27,"KH. Bagas Sihotang, S.I.Kom","kh..bagas.sihotang,.s.i.kom46@ymail.com",(0325) 737 5534,1984,Oppo,Media Sosial,Taman
996,Taman Kota,12:07,Rahmi Damanik,rahmi.damanik94@hotmail.com,(066) 708-0850,1983,Samsung,Konten Video,Taman
997,Terminal Kota,08:05,drg. Ami Mayasari,drg..ami.mayasari74@gmail.com,+62 (874) 951 1918,1996,Motorola,Musik Streaming,Mall
998,Taman Kota,12:44,"R. Kasiran Manullang, M.TI.","r..kasiran.manullang,.m.ti.63@gmail.com",+62 (097) 931 6477,1979,iPhone,Game Mobile,Transportasi


In [12]:
df_dummy['Usia'] = 2025 - df_dummy['Tahun Lahir']

def klasifikasi_generasi(tahun_lahir):
    if 1928 <= tahun_lahir <= 1945:
        return "Generasi Silent"
    elif 1946 <= tahun_lahir <= 1964:
        return "Generasi Baby Boomers"
    elif 1965 <= tahun_lahir <= 1980:
        return "Generasi X"
    elif 1981 <= tahun_lahir <= 1996:
        return "Generasi Milenial"
    elif 1997 <= tahun_lahir <= 2012:
        return "Generasi Z"
    elif tahun_lahir >= 2013:
        return "Generasi Alpha"
    else:
        return "Tidak Diketahui"
df_dummy["Generasi"] = df_dummy["Tahun Lahir"].apply(klasifikasi_generasi)

def klasifikasi_engagement_clock(jam_login):
    jam_obj = datetime.strptime(jam_login, "%H:%M")
    jam = jam_obj.hour
    menit = jam_obj.minute

    if (7 <= jam < 9) or (jam == 9 and menit == 0):
        return "Peak 1"
    elif (12 <= jam < 14) or (jam == 14 and menit == 0):
        return "Peak 2"
    elif (19 <= jam < 21) or (jam == 21 and menit == 0):
        return "Peak 3"
    else:
        return "Low Engagement Hours"
df_dummy["Engagement Clock"] = df_dummy["Jam Login"].apply(klasifikasi_engagement_clock)

def time_custom_grouping(jam_login):
    jam_obj = datetime.strptime(jam_login, "%H:%M")
    jam = jam_obj.hour

    if 0 <= jam <= 5:
        return "Dini Hari"
    elif 6 <= jam <= 10:
        return "Pagi"
    elif 11 <= jam <= 13:
        return "Tengah Hari"
    elif 14 <= jam <= 17:
        return "Sore"
    elif 18 <= jam <= 21:
        return "Prime Time"
    elif 22 <= jam <= 23:
        return "Late Night"
    else:
        return "Tidak Diketahui"
df_dummy["Time Custom Grouping"] = df_dummy["Jam Login"].apply(time_custom_grouping)

def hitung_skor_minat_digital(akses_harian, durasi, interaksi, platform):
    bobot_akses_harian = 0.30
    bobot_durasi = 0.30
    bobot_interaksi = 0.20
    bobot_platform = 0.20

    if not (1 <= akses_harian <= 10):
        raise ValueError("Skor Akses Harian harus antara 1 dan 10")
    if not (1 <= durasi <= 10):
        raise ValueError("Skor Durasi harus antara 1 dan 10")
    if not (1 <= interaksi <= 10):
        raise ValueError("Skor Interaksi harus antara 1 dan 10")
    if not (1 <= platform <= 10):
        raise ValueError("Skor Platform harus antara 1 dan 10")
    
    # Hitung skor minat digital
    skor_minat = (akses_harian * bobot_akses_harian + 
                  durasi * bobot_durasi + 
                  interaksi * bobot_interaksi + 
                  platform * bobot_platform)
    
    return skor_minat

akses_harian = 8     
durasi = 7           
interaksi = 5        
platform = 5        

df_dummy['skor minat'] = hitung_skor_minat_digital(akses_harian, durasi, interaksi, platform)


In [13]:
df_dummy

Unnamed: 0,Nama Lokasi,Jam Login,Nama,Email,No Telepon,Tahun Lahir,Merk HP,Minat Digital,Tipe Lokasi,Usia,Generasi,Engagement Clock,Time Custom Grouping,skor minat
0,Cafe Kopi Starbucks,03:52,"Balidin Dongoran, S.T.","balidin.dongoran,.s.t.4@ymail.com",+62 (18) 196 0013,1982,Xiaomi,Transportasi Online,Mall,43,Generasi Milenial,Low Engagement Hours,Dini Hari,6.5
1,Kampus Utama,20:20,Johan Suartini,johan.suartini12@hotmail.com,+62-863-794-0265,1988,Samsung,Media Sosial,Mall,37,Generasi Milenial,Peak 3,Prime Time,6.5
2,Co-Working Office,05:44,Eva Pratama,eva.pratama30@hotmail.com,+62 (116) 155 9407,1994,Samsung,Aplikasi Produktivitas,Kafe,31,Generasi Milenial,Low Engagement Hours,Dini Hari,6.5
3,Kampus Utama,05:33,R. Virman Waluyo,r..virman.waluyo54@mail.com,+62 (0959) 310-3413,1989,Motorola,Media Sosial,Kafe,36,Generasi Milenial,Low Engagement Hours,Dini Hari,6.5
4,Mall Grand,12:46,"Puti Kezia Setiawan, M.M.","puti.kezia.setiawan,.m.m.44@ymail.com",+62 (255) 341-9283,1979,Xiaomi,Berita Online,Mall,46,Generasi X,Peak 2,Tengah Hari,6.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,Mall Grand,23:27,"KH. Bagas Sihotang, S.I.Kom","kh..bagas.sihotang,.s.i.kom46@ymail.com",(0325) 737 5534,1984,Oppo,Media Sosial,Taman,41,Generasi Milenial,Low Engagement Hours,Late Night,6.5
996,Taman Kota,12:07,Rahmi Damanik,rahmi.damanik94@hotmail.com,(066) 708-0850,1983,Samsung,Konten Video,Taman,42,Generasi Milenial,Peak 2,Tengah Hari,6.5
997,Terminal Kota,08:05,drg. Ami Mayasari,drg..ami.mayasari74@gmail.com,+62 (874) 951 1918,1996,Motorola,Musik Streaming,Mall,29,Generasi Milenial,Peak 1,Pagi,6.5
998,Taman Kota,12:44,"R. Kasiran Manullang, M.TI.","r..kasiran.manullang,.m.ti.63@gmail.com",+62 (097) 931 6477,1979,iPhone,Game Mobile,Transportasi,46,Generasi X,Peak 2,Tengah Hari,6.5


In [14]:
df_dummy.to_excel('Data Source Sample.xlsx')