#Pre set up

Open ports from 8000 to 9000

In [None]:
!sudo apt-get update
!sudo apt-get install ufw
!sudo ufw enable
!sudo ufw allow 8000:9000/tcp

# Install DPDG
Clone the project and install the requirements. At the end, if you see an Error message with a restart button, it's okay. Click the button, close the message window, and run the next block of code.

In [None]:
!git clone https://github.com/Pseudolukian/DPDG.git
%cd DPDG
%mkdir gen_out
%mkdir static/generated_content
!pip3 install pyngrok
!pip3 install pyngrok --upgrade
!pip3 install -r requirements.txt

In [1]:
!ngrok config add-authtoken 2TWCqocFxQxhwJh54aFDlupr4U4_7VeM5mQrrBJpPKGYnb34q
%cd DPDG

Authtoken saved to configuration file: /root/.ngrok2/ngrok.yml
/content/DPDG


# Working with DPDG in manual mode
In this code block, you can manipulate data generators and export data in various formats: JSON, XLS, CSV, and LSQL.

The exported data will be saved in the "gen_out" folder.

In [None]:
#=====Importing generator classes========
from fakegenerator import FakeGenerator # Main generator class.
from file_exporter import File_Exporter # Class for exporting data to various formats.
from sql_exporter import SQL_exporter # Class for handling SQL dump logic.
from pathlib import Path

f_exp = File_Exporter()
json_out = Path("./gen_out/out.json")

#=========Setting up SQL Exporter=============================#
# sql_engine -- parameter used to specify the SQL format. Current options: sqlite and postgresql.
# user and password -- parameters used exclusively for PostgreSQL connections.
# db_name -- parameter used for both SQLite and PostgreSQL databases.
sql_exp = SQL_exporter(sql_engine="sqlite", user="exporter",
                       password="exporter", db_name="pers_data_test")


#========Main data generation loop==========#
for _ in range(1):
    f_g = FakeGenerator() # Parameters like age, sex, and country can be customized here.
    pers = f_g.generator.personal()
    pas = f_g.generator.passport()
    cont = f_g.generator.contacts()
    exp = f_g.generator.experience()
    dip = f_g.generator.diploma()
    ad = f_g.generator.address()
    bio = f_g.generator.biometric()
    dr_l = f_g.generator.driver_license()

    f_exp.buffer.add(pers, pas, ad, cont, exp, dip, bio, dr_l) # Add data models to the file buffer.
    sql_exp.buffer.add(pers, pas, ad, cont, exp, dip, bio, dr_l) # Add data models to the SQL buffer.

#To view the buffered data, you can run: print(f_exp.buffer.buf) or print(sql_exp.buffer.buf)

f_exp.json(path_json=json_out)

# Web application

In [2]:
from threading import Thread
import threading
from pyngrok import ngrok
from pyngrok import conf
import uvicorn

!curl ipecho.net/plain > IP_CONNECT.txt

with open("IP_CONNECT.txt", "r") as f:
    IP_ADDRESS = f.read().strip()

#========Import DPDG classes for data generation====#
from fakegenerator import FakeGenerator
from file_exporter import File_Exporter
from sql_exporter import SQL_exporter

#========Import libraries for path manipulation and typing====#
import os
from pathlib import Path
from typing import List, Dict, Callable

#========Import FAST API and Jinja2 for web application setup=======#
from fastapi import FastAPI, Request, Form
from fastapi.templating import Jinja2Templates
from fastapi.staticfiles import StaticFiles

#===============Initialize FAST API and configure the static files directory======#
app = FastAPI(debug=True)
try:
    current_directory = os.path.dirname(os.path.realpath(__file__))
except NameError:
    current_directory = os.getcwd()
static_directory = os.path.join(current_directory, "static") # Move to static underfolder.
app.mount("/static", StaticFiles(directory="static"), name="static") #Mount full path to static folder.
templates = Jinja2Templates(directory="templates") #Set up the directory with html templates.

#======Initialize DPDG generator classes======#
f_exp = File_Exporter()

#========Define paths to save generated data======#
path_to_save = Path("./static/generated_content/")
csv_path = path_to_save / "out.csv"
json_path = path_to_save / "out.json"
xls_path = path_to_save / "out.xls"
lsql_name = "out"


#=======Initialize SQL Exporter class and configure it.======#
sql_exp = SQL_exporter(db_path=path_to_save, db_name=lsql_name)

#========Main zone of web application set up===========================#
def reset_previuse_data() -> bool:
    for file in path_to_save.iterdir():
        if file.is_file():
            file.unlink()
    return True

@app.get("/")
async def root(request: Request):
    reset_previuse_data()
    return templates.TemplateResponse("index.html", {"request": request})

@app.post("/generate/")
async def generate(
    data: List[str] = Form(...),       # List of data models to be generated.
    sex: List[str] = Form(...),        # Sex parameters for data generation.
    country: List[str] = Form(...),    # Country parameters for data generation.
    min_age: int = Form(...),          # Minimum age for data generation.
    max_age: int = Form(...),          # Maximum age for data generation.
    file_types: List[str] = Form(...), # File formats to which data should be exported.
    value:int = Form(...)              # Number of data generation iterations (number of rows).
    ) -> Dict[str,str]:

    models_in_req = []

    #=====Main data generation loop====#
    for _ in range(value):
        f_g = FakeGenerator(sex=sex, age=[min_age, max_age], country=country)
        for x in data:
            models_in_req.append(getattr(f_g.generator, x, None)())

    #=====Adding generated data to buffers===#
    for mod in models_in_req:
        f_exp.buffer.add(mod)
        sql_exp.buffer.add(mod)

    #====Export generated data to specified file formats===#
    for f_t in file_types:
        if f_t == "JSON":
            f_exp.json(path_json=json_path)
        elif f_t == "CSV":
            f_exp.csv(save_to_string=True, path_csv=csv_path)
        elif f_t == "XLS":
            f_exp.xls(path_xls= xls_path)
        elif f_t == "LiteSQL":
            sql_exp.dump_data(sql_buffer=sql_exp.buffer.buf)

    urls = {}

    #========Create URLs for exported data files======#
    for url in file_types:
        if url == "CSV":
            urls[url] = str(csv_path)
        elif url == "JSON":
            urls[url] = str(json_path)
        elif url == "XLS":
            urls[url] = str(xls_path)
        elif url == "LiteSQL":
            urls[url] = str(path_to_save / lsql_name)
    return urls


@app.post("/reset/")
async def reset() -> bool:

    return reset_previuse_data()

# This function will run your FastAPI app
ngrok.kill()
conn_port = 8005
def run():
    uvicorn.run(app, host="0.0.0.0", port=conn_port, log_level="info")

# Create a new thread and start it
threading.Thread(target=run).start()

public_url = ngrok.connect(conn_port)
print('Public URL:', public_url)
conf.get_default().loglevel = "DEBUG"

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0100    12  100    12    0     0    102      0 --:--:-- --:--:-- --:--:--   103


INFO:     Started server process [1941]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8005 (Press CTRL+C to quit)


Public URL: NgrokTunnel: "https://64c8-34-32-163-77.ngrok-free.app" -> "http://localhost:8005"
