In [40]:
import sqlite3
import os
import random
from faker import Faker

In [41]:
fake = Faker()

class ATSDatabase:
    def __init__(self, db_file='atsdatabase.db'):
        self.conn = sqlite3.connect(db_file)
        self.cursor = self.conn.cursor()
        self.create_tables()

    def create_tables(self):
        self.cursor.execute('''CREATE TABLE IF NOT EXISTS applicantprofile (
            applicant_id INTEGER PRIMARY KEY AUTOINCREMENT,
            first_name TEXT,
            last_name TEXT,
            date_of_birth TEXT,
            address TEXT,
            phone_number TEXT
        )''')

        self.cursor.execute('''CREATE TABLE IF NOT EXISTS applicationdetail (
            detail_id INTEGER PRIMARY KEY AUTOINCREMENT,
            applicant_id INTEGER NOT NULL,
            applicant_role TEXT,
            cv_path TEXT,
            FOREIGN KEY (applicant_id) REFERENCES applicantprofile(applicant_id)
        )''')

        self.conn.commit()

    def add_applicant(self, first_name, last_name, date_of_birth, address, phone_number):
        self.cursor.execute('''INSERT INTO applicantprofile (first_name, last_name, date_of_birth, address, phone_number)
                               VALUES (?, ?, ?, ?, ?)''', (first_name, last_name, date_of_birth, address, phone_number))
        self.conn.commit()

    def get_applicant_id(self, first_name, last_name):
        self.cursor.execute('''SELECT applicant_id FROM applicantprofile WHERE first_name=? AND last_name=?''', (first_name, last_name))
        result = self.cursor.fetchone()
        if result:
            return result[0]
        else:
            return None

    def get_or_create_applicant(self, first_name, last_name, date_of_birth="", address="", phone_number=""):
        applicant_id = self.get_applicant_id(first_name, last_name)
        if not applicant_id:
            self.add_applicant(first_name, last_name, date_of_birth, address, phone_number)
            applicant_id = self.cursor.lastrowid
        return applicant_id

    def add_application(self, applicant_id, applicant_role, cv_path):
        self.cursor.execute('''INSERT INTO applicationdetail (applicant_id, applicant_role, cv_path)
                               VALUES (?, ?, ?)''', (applicant_id, applicant_role, cv_path))
        self.conn.commit()

    def save_cv_to_database(self, first_name, last_name, applicant_role, cv_path):
        date_of_birth = fake.date_of_birth(minimum_age=18, maximum_age=60).strftime("%Y-%m-%d")
        address = fake.address().replace('\n', ', ')
        phone_number = fake.phone_number()

        applicant_id = self.get_or_create_applicant(first_name, last_name, date_of_birth, address, phone_number)
        self.add_application(applicant_id, applicant_role, cv_path)

    def get_all_applicants(self):
        self.cursor.execute('SELECT * FROM applicantprofile')
        return self.cursor.fetchall()

    def get_all_applications(self):
        self.cursor.execute('SELECT * FROM applicationdetail')
        return self.cursor.fetchall()

    def update_applicant(self, applicant_id, first_name, last_name, date_of_birth, address, phone_number):
        self.cursor.execute('''UPDATE applicantprofile
                               SET first_name=?, last_name=?, date_of_birth=?, address=?, phone_number=?
                               WHERE applicant_id=?''', (first_name, last_name, date_of_birth, address, phone_number, applicant_id))
        self.conn.commit()

    def delete_applicant(self, applicant_id):
        self.cursor.execute('DELETE FROM applicantprofile WHERE applicant_id=?', (applicant_id,))
        self.conn.commit()

    def update_application(self, detail_id, applicant_id, applicant_role, cv_path):
        self.cursor.execute('''UPDATE applicationdetail
                               SET applicant_id=?, applicant_role=?, cv_path=?
                               WHERE detail_id=?''', (applicant_id, applicant_role, cv_path, detail_id))
        self.conn.commit()

    def delete_application(self, detail_id):
        self.cursor.execute('DELETE FROM applicationdetail WHERE detail_id=?', (detail_id,))
        self.conn.commit()

    def close(self):
        self.conn.close()

In [42]:
def read_file(root_dir):
    ats_db = ATSDatabase()  

    first_names = ['John', 'Supri', 'Jane', 'Alex', 'Emily', 'Asep', 'Jajang', 'Chris', 'Katie', 'Dadang', 'Michael', 'Sara', 'Cecep', 'David', 'Laura']
    last_names = ['Smith', 'Saepuloh', 'Johnson', 'Williams', 'Suherman', 'Brown', 'Jones', 'Garcia', 'Sumarna', 'Miller', 'Davis', 'Santosa', 'Martinez', 'Hernandez']
    
    for category in os.listdir(root_dir):
        category_path = os.path.join(root_dir, category)

        if os.path.isdir(category_path): 
            print(f"Processing category: {category}")

            pdf_files = [f for f in os.listdir(category_path) if f.endswith('.pdf')][:20]

            for pdf_file in pdf_files:
                pdf_path = os.path.join(category_path, pdf_file)
                print(f"Processing CV: {pdf_file}")

                ats_db.save_cv_to_database(first_name=random.choice(first_names), 
                                            last_name=random.choice(last_names), 
                                            applicant_role=category, 
                                            cv_path=pdf_path)

    ats_db.close()


In [43]:
read_file('archive\data\data')

Processing category: ACCOUNTANT
Processing CV: 10554236.pdf
Processing CV: 10674770.pdf
Processing CV: 11163645.pdf
Processing CV: 11759079.pdf
Processing CV: 12065211.pdf
Processing CV: 12202337.pdf
Processing CV: 12338274.pdf
Processing CV: 12442909.pdf
Processing CV: 12780508.pdf
Processing CV: 12802330.pdf
Processing CV: 13072019.pdf
Processing CV: 13130984.pdf
Processing CV: 13294301.pdf
Processing CV: 13491889.pdf
Processing CV: 13701259.pdf
Processing CV: 14055988.pdf
Processing CV: 14126433.pdf
Processing CV: 14224370.pdf
Processing CV: 14449423.pdf
Processing CV: 14470533.pdf
Processing category: ADVOCATE
Processing CV: 10186968.pdf
Processing CV: 10344379.pdf
Processing CV: 10659182.pdf
Processing CV: 10818478.pdf
Processing CV: 11174187.pdf
Processing CV: 11188218.pdf
Processing CV: 11773767.pdf
Processing CV: 11963737.pdf
Processing CV: 12171093.pdf
Processing CV: 12544735.pdf
Processing CV: 13072354.pdf
Processing CV: 13115648.pdf
Processing CV: 13342150.pdf
Processing CV:

In [44]:
import sqlite3

def verify_database(db_file='atsdatabase.db'):
    conn = sqlite3.connect(db_file)
    cursor = conn.cursor()

    print("\n--- Applicants ---")
    cursor.execute('SELECT * FROM applicantprofile')
    applicants = cursor.fetchall()
    for applicant in applicants:
        print(applicant)

    print("\n--- Applications ---")
    cursor.execute('SELECT * FROM applicationdetail')
    applications = cursor.fetchall()
    for application in applications:
        print(application)

    conn.close()

verify_database()



--- Applicants ---
(1, 'Sara', 'Smith', '1969-01-03', '949 Burton Shoal Suite 385, East Luke, RI 27904', '+1-500-967-7053x41466')
(2, 'Katie', 'Saepuloh', '1973-12-18', 'USNS Howard, FPO AP 43056', '8405878720')
(3, 'Chris', 'Hernandez', '1992-07-20', '83813 Anthony Plains, West Maria, MS 16793', '892-877-9096')
(4, 'Supri', 'Smith', '1970-09-08', 'PSC 4251, Box 0974, APO AE 16678', '450-219-6785x348')
(5, 'David', 'Davis', '1997-03-28', '813 Obrien Overpass Apt. 811, North Amanda, ID 19969', '456-648-0716')
(6, 'Sara', 'Suherman', '1966-10-22', '147 Nathan Hollow, Jessicaberg, MO 43245', '534.949.9678x951')
(7, 'Asep', 'Garcia', '2001-10-08', 'Unit 9958 Box 1848, DPO AA 32609', '001-483-659-1612')
(8, 'Michael', 'Williams', '1975-11-30', '88303 Clark Overpass Apt. 488, Yangberg, DE 99344', '255.360.6414')
(9, 'Sara', 'Martinez', '1978-05-19', '67953 Frank Squares, West George, PA 98747', '+1-344-715-9348x80972')
(10, 'Emily', 'Sumarna', '1983-02-18', '6138 Aimee Lane Suite 656, Tiffa