Import faker, json library

In [1]:
import random
from faker import Faker
import json
from datetime import datetime
from sqlalchemy import create_engine, Column, String, Integer, Boolean, DateTime
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

f = Faker()

Definiranje sheme za profil kojeg ćemo generirati

In [2]:
# SQLite baza za SQLAlchemy
engine = create_engine('sqlite:///profiles.db')
Base = declarative_base()

# Definiram profil kojeg koristim u generatoru kasnije
class Profile(Base):
    __tablename__ = 'profiles'
    id = Column(Integer, primary_key=True)
    createDate = Column(DateTime)
    customerId = Column(String)
    email = Column(String)
    loyaltyId = Column(String)
    phoneNumber = Column(String)
    age = Column(Integer)
    favouriteCategory = Column(String)
    favouriteSubCategory = Column(String)
    favoriteColor = Column(String)
    pantsSize = Column(String)
    shirtSize = Column(String)
    shoeSize = Column(Integer)
    userAccountID = Column(String)
    joinDate = Column(DateTime)
    points = Column(Integer)
    consent_val = Column(Boolean)
    preferred_communication = Column(String)
    city = Column(String)
    country = Column(String)
    countryCode = Column(String)
    street1 = Column(String)
    postalCode = Column(String)
    mobilePhoneNumber = Column(String)
    birthDayAndMonth = Column(String)
    birthYear = Column(Integer)
    lastName = Column(String)
    fullName = Column(String)
    firstName = Column(String)
    gender = Column(String)
    personalEmail = Column(String)
    testProfile = Column(Boolean)

# Stvori mi tablicu u bazi
Base.metadata.create_all(engine)
Session = sessionmaker(bind=engine)
session = Session()


Definiramo funkciju <i>generate_profile

In [3]:
# Lokalni fejkeri
fakers = {
    "US": Faker("en_US"),
    "UK": Faker("en_GB"),
    "Canada": Faker("en_CA"),
    "Australia": Faker("en_AU"),
}

In [4]:
def generate_profile():
    # Random država
    country_choice = random.choice(list(fakers.keys()))
    f = fakers[country_choice]

    #Randomizer imena po spolu da izgleda realnije
    gender = random.choice(["male", "female", "other"])
    if gender == "male":
        first_name = f.first_name_male()
    elif gender == "female":
        first_name = f.first_name_female()
    else:
        first_name = f.first_name()

    last_name = f.last_name()
    full_name = f"{first_name} {last_name}"
    age = random.randint(18, 70)
    birth_year = 2023 - age
    email = generate_email(first_name, last_name)

    profile = {
        "createDate": f.date_time_this_year().isoformat(),
        "identification": {
            "customerId": f.random_number(digits=9, fix_len=True),
            "email": email,
            "loyaltyId": f.random_number(digits=13, fix_len=True),
            "phoneNumber": f.phone_number()
        },
        "individualCharacteristics": {
            "core": {
                "age": age,
                "favouriteCategory": random.choice(["Mens", "Womens", "Kids"]),
                "favouriteSubCategory": random.choice(["Jackets and Hoodies", "Jeans", "T-shirts"])
            },
            "retail": {
                "favoriteColor": f.color_name(),
                "pantsSize": random.choice(["XS", "S", "M", "L", "XL"]),
                "shirtSize": random.choice(["XS", "S", "M", "L", "XL"]),
                "shoeSize": random.randint(36, 45)
            }
        },
        "userAccount": {
            "ID": f.random_number(digits=9, fix_len=True)
        },
        "loyalty": {
            "loyaltyID": f.random_number(digits=13, fix_len=True),
            "joinDate": f.date_time_this_decade().isoformat(),
            "points": random.randint(0, 1000000)
        },
        "consents": {
            "collect": {
                "val": random.choice(["y", "n"])
            },
            "marketing": {
                "preferred": random.choice(["email", "sms", "push"])
            }
        },
        "homeAddress": {
            "city": f.city(),
            "country": country_choice,
            "countryCode": f.country_code(),
            "street1": f.street_address(),
            "postalCode": f.postcode()
        },
        "mobilePhone": {
            "number": f.phone_number()
        },
        "person": {
            "birthDayAndMonth": f"{f.date_of_birth().strftime('%m-%d')}",
            "birthYear": birth_year,
            "name": {
                "lastName": last_name,
                "fullName": full_name,
                "firstName": first_name
            },
            "gender": gender
        },
        "personalEmail": {
            "address": email
        },
        "testProfile": True
    }
    return profile

    #mali generator za email, nekoliko opcija za realni mail
    
def generate_email(first_name, last_name):
    domain = random.choice(["gmail.com", "mail.com", "retail.com", "hotmail.com"])
    email_formats = [
        f"{first_name}.{last_name}@{domain}",
        f"{first_name[0]}{last_name}@{domain}",
        f"{first_name}{last_name[0]}@{domain}",
        f"{first_name}_{last_name}@{domain}",
        f"{first_name}{last_name}@{domain}",
    ]
    return random.choice(email_formats).lower()

Generiraj n profila i prikaži u JSON formatu

In [8]:
num_profiles = 5
profiles_data = [generate_profile() for _ in range(num_profiles)]

print(json.dumps(profiles, indent=4))


[
    {
        "createDate": "2024-08-13T15:06:55",
        "identification": {
            "customerId": 304351074,
            "email": "rwalters@retail.com",
            "loyaltyId": 6277961757366,
            "phoneNumber": "(0117)4960508"
        },
        "individualCharacteristics": {
            "core": {
                "age": 23,
                "favouriteCategory": "Mens",
                "favouriteSubCategory": "Jackets and Hoodies"
            },
            "retail": {
                "favoriteColor": "Thistle",
                "pantsSize": "L",
                "shirtSize": "M",
                "shoeSize": 45
            }
        },
        "userAccount": {
            "ID": 597964110
        },
        "loyalty": {
            "loyaltyID": 4991556123672,
            "joinDate": "2023-04-16T16:35:12",
            "points": 951405
        },
        "consents": {
            "collect": {
                "val": "n"
            },
            "marketing": {
              

Možemo ih i spremiti u file <i> customer_profiles.json.</i> Datoteka se nalazi u istom direktoriju.

In [9]:
with open("customer_profiles.json", "w") as file:
    json.dump(profiles, file, indent=4)
print("Profiles saved to customer_profiles.json")

Profiles saved to customer_profiles.json


Nakon kreacije sample profila, ubacujemo ih u bazu.

In [10]:
for data in profiles_data:
    profile = Profile(
        createDate=datetime.fromisoformat(data["createDate"]),
        customerId=str(data["identification"]["customerId"]),
        email=data["identification"]["email"],
        loyaltyId=str(data["identification"]["loyaltyId"]),
        phoneNumber=data["identification"]["phoneNumber"],
        age=data["individualCharacteristics"]["core"]["age"],
        favouriteCategory=data["individualCharacteristics"]["core"]["favouriteCategory"],
        favouriteSubCategory=data["individualCharacteristics"]["core"]["favouriteSubCategory"],
        favoriteColor=data["individualCharacteristics"]["retail"]["favoriteColor"],
        pantsSize=data["individualCharacteristics"]["retail"]["pantsSize"],
        shirtSize=data["individualCharacteristics"]["retail"]["shirtSize"],
        shoeSize=data["individualCharacteristics"]["retail"]["shoeSize"],
        userAccountID=str(data["userAccount"]["ID"]),
        joinDate=datetime.fromisoformat(data["loyalty"]["joinDate"]),
        points=data["loyalty"]["points"],
        consent_val=(data["consents"]["collect"]["val"] == "y"),
        preferred_communication=data["consents"]["marketing"]["preferred"],
        city=data["homeAddress"]["city"],
        country=data["homeAddress"]["country"],
        countryCode=data["homeAddress"]["countryCode"],
        street1=data["homeAddress"]["street1"],
        postalCode=data["homeAddress"]["postalCode"],
        mobilePhoneNumber=data["mobilePhone"]["number"],
        birthDayAndMonth=data["person"]["birthDayAndMonth"],
        birthYear=data["person"]["birthYear"],
        lastName=data["person"]["name"]["lastName"],
        fullName=data["person"]["name"]["fullName"],
        firstName=data["person"]["name"]["firstName"],
        gender=data["person"]["gender"],
        personalEmail=data["personalEmail"]["address"],
        testProfile=data["testProfile"]
    )
    session.add(profile)  # Add profile to session

# Commit all profiles to the database
session.commit()
print(f"Inserted {num_profiles} profiles into the database.")

Inserted 5 profiles into the database.
