### Creating database

In [25]:
import pandas as pd
import mysql.connector
from sqlalchemy import create_engine

In [26]:
#creating the database
def db_creation(password, db):
    
    cnx = mysql.connector.connect(user='root', password=f'{password}',
                                auth_plugin='mysql_native_password',
                                host='127.0.0.1')


    mycursor = cnx.cursor()
    try:
        mycursor.execute(f"CREATE DATABASE IF NOT EXISTS {db}")
        print(mycursor)
    except mysql.connector.Error as err:
        print(err)
        print("Error Code:", err.errno)
        print("SQLSTATE", err.sqlstate)
        print("Message", err.msg)

In [27]:
db_creation('AlumnaAdalab', 'HR_Database')

CMySQLCursor: CREATE DATABASE IF NOT EXISTS HR_Databas..


In [28]:
# opening the csv of the dataframe with df dtype and MySQL type 'traduction': 
col_types = pd.read_csv('data/HR_dataset/conversion_sql', index_col=0)

In [29]:
#Copyping the dictionaire of tables creation (table: columns)
col_table = {'manager': ['ManagerID', 'ManagerName'],
            'marital_status': ['MaritalStatusID', 'MaritalDesc', 'Married'],
            'gender': ['GenderID', 'Sex'], 
            'personal_profile': ['EmpID', 'Employee_Name', 'DOB', 'MaritalStatusID','GenderID', 'FromDiversityJobFairID', 'CitizenDesc', 'RaceDesc', 'HispanicLatino', 'State', 'Zip'],
            'recruitment': ['EmpID', 'RecruitmentSource'],
            'performance_employee': ['EmpID', 'LastPerformanceReview_Date', 'PerformanceScore'],
            'engagement': ['EmpID', 'EngagementSurvey', 'EmpSatisfaction'],
            'absenteeism': ['EmpID', 'DaysLateLast30', 'Absences'],
            'attrition': ['EmpID', 'EmploymentStatus', 'DateofTermination', 'TermReason'],
            'working_profile': ['EmpID', 'Term', 'EmploymentStatus', 'DateofHire', 'DateofTermination', 'Salary', 'Department', 'Position', 'ManagerID', 'SpecialProjectsCount']}

In [30]:
#automating the tables creation:
def tables_creation(password, db, tables_dict):
    cnx = mysql.connector.connect(user='root', password=f'{password}',
                                host='127.0.0.1', database=f'{db}')

    mycursor = cnx.cursor()
    try:
        for table_name, columns in tables_dict.items():
            columns_sql = ', '.join([f'{column} {col_types.loc[col_types["index"] == column, "SQL_conv"].values[0]}' for column in columns])
            
            mycursor.execute(f'CREATE TABLE IF NOT EXISTS {table_name} ({columns_sql}, PRIMARY KEY ({columns[0]}));')
            print(mycursor)
            
    except mysql.connector.Error as err:
        print(err)
        print("Error Code:", err.errno)
        print("SQLSTATE", err.sqlstate)
        print("Message", err.msg)

In [31]:
tables_creation('AlumnaAdalab', 'HR_Database', col_table)

CMySQLCursor: CREATE TABLE IF NOT EXISTS manager (Mana..


CMySQLCursor: CREATE TABLE IF NOT EXISTS marital_statu..
CMySQLCursor: CREATE TABLE IF NOT EXISTS gender (Gende..
CMySQLCursor: CREATE TABLE IF NOT EXISTS personal_prof..
CMySQLCursor: CREATE TABLE IF NOT EXISTS recruitment (..
CMySQLCursor: CREATE TABLE IF NOT EXISTS performance_e..
CMySQLCursor: CREATE TABLE IF NOT EXISTS engagement (E..
CMySQLCursor: CREATE TABLE IF NOT EXISTS absenteeism (..
CMySQLCursor: CREATE TABLE IF NOT EXISTS attrition (Em..
CMySQLCursor: CREATE TABLE IF NOT EXISTS working_profi..


In [32]:
#creating foreign keys:
cnx = mysql.connector.connect(user='root', password='AlumnaAdalab',
                              host='127.0.0.1', database='HR_Database')

mycursor = cnx.cursor()
try:

    mycursor.execute("""ALTER TABLE `working_profile` 
                     ADD CONSTRAINT `fk_working_manager` FOREIGN KEY (`ManagerID`) REFERENCES `manager` (`ManagerID`) ON DELETE RESTRICT ON UPDATE CASCADE,
                     ADD CONSTRAINT `fk_working_personal` FOREIGN KEY (`EmpID`) REFERENCES `personal_profile` (`EmpID`) ON DELETE RESTRICT ON UPDATE CASCADE;
                     ALTER TABLE `recruitment` ADD CONSTRAINT `fk_recruitment_personal` FOREIGN KEY (`EmpID`) REFERENCES `personal_profile` (`EmpID`) ON DELETE RESTRICT ON UPDATE CASCADE; 
                     ALTER TABLE `performance_employee` ADD CONSTRAINT `fk_performance_employee_personal` FOREIGN KEY (`EmpID`) REFERENCES `personal_profile` (`EmpID`) ON DELETE RESTRICT ON UPDATE CASCADE; 
                     ALTER TABLE `engagement` ADD CONSTRAINT `fk_engagement_personal` FOREIGN KEY (`EmpID`) REFERENCES `personal_profile` (`EmpID`) ON DELETE RESTRICT ON UPDATE CASCADE; 
                     ALTER TABLE `absenteeism` ADD CONSTRAINT `fk_absenteeism_personal` FOREIGN KEY (`EmpID`) REFERENCES `personal_profile` (`EmpID`) ON DELETE RESTRICT ON UPDATE CASCADE; 
                     ALTER TABLE `attrition` ADD CONSTRAINT `fk_attrition_personal` FOREIGN KEY (`EmpID`) REFERENCES `personal_profile` (`EmpID`) ON DELETE RESTRICT ON UPDATE CASCADE; 
                     ALTER TABLE `personal_profile` 
                     ADD CONSTRAINT `fk_personal_marital` FOREIGN KEY (`MaritalStatusID`) REFERENCES `marital_status`(`MaritalStatusID`) ON DELETE RESTRICT ON UPDATE CASCADE; 
                     ADD CONSTRAINT `fk_personal_gender` FOREIGN KEY (`GenderID`) REFERENCES `gender` (`GenderID`) ON DELETE RESTRICT ON UPDATE CASCADE;""")
    print(mycursor)
    
except mysql.connector.Error as err:
    print(err)
    print("Error Code:", err.errno)
    print("SQLSTATE", err.sqlstate)
    print("Message", err.msg)

CMySQLCursor: ALTER TABLE `working_profile` 
         ..


In [33]:
#let's insert in MySQL:
engine = create_engine('mysql+pymysql://root:AlumnaAdalab@localhost:3306/HR_Database')

In [35]:
#importing the tables to insert:
for k in col_table.keys():
    df = pd.read_csv(f'data/HR_dataset/{k}.csv')
    df.to_sql(f'{k}', con=engine, if_exists= 'append', index= False)