**Alter working directory to get classes from parent folder**

In [1]:
import sys
import os

In [2]:
# Get the current working directory
current_dir = os.getcwd()

# Get the parent directory (Job_Category_Classification)
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

sys.path.append(parent_dir)

**Establish Database Connection**

In [3]:
import yaml
from scripts.SqlConn import SqlConn

In [4]:
def load_config(file_path):
    full_path = os.path.join(parent_dir, file_path)
    with open(full_path,'r') as f:
        config = yaml.safe_load(f)
    return config

In [5]:
config = load_config('./configuration/config.yaml')
db_host = config['local_db']['host']
db_port = config['local_db']['port']
database = config['local_db']['database']
username = config['local_db']['username']
password = config['local_db']['password']

In [6]:
# Create database connection instance
sqlconn_obj = SqlConn(username,password,db_host,db_port,database)
engine = sqlconn_obj.connect()

**Train Model**

In [7]:
from train_model.TrainModel import TrainModel

In [8]:
TrainModel_obj = TrainModel()
category_df = TrainModel_obj.getDistinctCategory(engine)
threshold = 0.5

for index, row in category_df.iterrows():
    category = row['category']
    df = TrainModel_obj.getTrainingData(category,engine)
    TrainModel_obj.trainModel(category,df,threshold,engine)
    

Accuracy: 0.9738562091503268
Recall: 0.9090909090909092
F1 Score: 0.9424812030075188
[[131   0]
 [  4  18]]
Accuracy: 0.9545454545454546
Recall: 0.78462441314554
F1 Score: 0.8211382113821137
[[140   2]
 [  5   7]]


**Update Model**

In [None]:
import pandas as pd
from sqlalchemy import text

In [None]:
with engine.connect() as conn:
    temp = '''
        select max(version) 
        from fact_sch.vectorization_model_tb
        where category = '{}'
        '''
    query = temp.format(category)
    rs = conn.execute(text(query))
    rows = rs.fetchall()

    temp = pd.DataFrame(rows,columns=['max_id'])

    max_id = temp['max_id'][0] + 1

In [None]:
for index, row in category_df.iterrows():
    category = row['category']
    TrainModel_obj.updateJobClassModelTable(category,max_id,engine)