In [30]:
import pandas as pd
import numpy as np
import pickle
import ast

# Load Data

In [31]:
df_inf = pd.read_csv('aug_test.csv')
df_inf

Unnamed: 0,enrollee_id,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours
0,32403,city_41,0.827,Male,Has relevent experience,Full time course,Graduate,STEM,9,<10,,1,21
1,9858,city_103,0.920,Female,Has relevent experience,no_enrollment,Graduate,STEM,5,,Pvt Ltd,1,98
2,31806,city_21,0.624,Male,No relevent experience,no_enrollment,High School,,<1,,Pvt Ltd,never,15
3,27385,city_13,0.827,Male,Has relevent experience,no_enrollment,Masters,STEM,11,10/49,Pvt Ltd,1,39
4,27724,city_103,0.920,Male,Has relevent experience,no_enrollment,Graduate,STEM,>20,10000+,Pvt Ltd,>4,72
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2124,1289,city_103,0.920,Male,No relevent experience,no_enrollment,Graduate,Humanities,16,,Public Sector,4,15
2125,195,city_136,0.897,Male,Has relevent experience,no_enrollment,Masters,STEM,18,,,2,30
2126,31762,city_100,0.887,Male,No relevent experience,no_enrollment,Primary School,,3,,Pvt Ltd,never,18
2127,7873,city_102,0.804,Male,Has relevent experience,Full time course,High School,,7,100-500,Public Sector,1,84


In [38]:
def klasifikasi_experience(experience):
    if pd.isna(experience):
        pass
    elif experience in ['<1','1', '2', '3','4', '5']:
        return 'Junior'
    elif experience in ['6', '7', '8', '9', '10']:
        return 'Intermediate'
    elif experience in ['11','12','13' ,'14','15']:
        return 'Senior'
    else:
        return 'Veteran'

Used to change the experience column to an experience category column

In [39]:
df_inf['experience_category'] = df_inf['experience'].apply(klasifikasi_experience)
df_inf

Unnamed: 0,enrollee_id,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,experience_category
0,32403,city_41,0.827,Male,Has relevent experience,Full time course,Graduate,STEM,9,<10,,1,21,Intermediate
1,9858,city_103,0.920,Female,Has relevent experience,no_enrollment,Graduate,STEM,5,,Pvt Ltd,1,98,Junior
2,31806,city_21,0.624,Male,No relevent experience,no_enrollment,High School,,<1,,Pvt Ltd,never,15,Junior
3,27385,city_13,0.827,Male,Has relevent experience,no_enrollment,Masters,STEM,11,10/49,Pvt Ltd,1,39,Senior
4,27724,city_103,0.920,Male,Has relevent experience,no_enrollment,Graduate,STEM,>20,10000+,Pvt Ltd,>4,72,Veteran
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2124,1289,city_103,0.920,Male,No relevent experience,no_enrollment,Graduate,Humanities,16,,Public Sector,4,15,Veteran
2125,195,city_136,0.897,Male,Has relevent experience,no_enrollment,Masters,STEM,18,,,2,30,Veteran
2126,31762,city_100,0.887,Male,No relevent experience,no_enrollment,Primary School,,3,,Pvt Ltd,never,18,Junior
2127,7873,city_102,0.804,Male,Has relevent experience,Full time course,High School,,7,100-500,Public Sector,1,84,Intermediate


# Load Model

In [40]:
# memanggil model yang sudah di simpan
with open('pipeline.pkl', 'rb') as file_1:
  pipeline = pickle.load(file_1)


with open('model_svc_best.pkl', 'rb') as file_2:
  model_svc = pickle.load(file_2)

with open('list_num_columns.txt', 'r') as file_3:
  list_num_columns = file_3.read()

with open('list_cat_columns.txt', 'r') as file_4:
  list_cat_columns = file_4.read()

In [41]:
# merubah nama-nama column menjadi list agar mudah dalam pemanggilan
list_num_columns = ast.literal_eval(list_num_columns)
list_cat_columns = ast.literal_eval(list_cat_columns)


In [42]:
# memasukan kolom yang akan digunakan untuk modeling
df_inf_fix = df_inf[list_cat_columns+list_num_columns]
df_inf_fix

Unnamed: 0,gender,relevent_experience,enrolled_university,education_level,major_discipline,company_size,company_type,last_new_job,experience_category,city_development_index
0,Male,Has relevent experience,Full time course,Graduate,STEM,<10,,1,Intermediate,0.827
1,Female,Has relevent experience,no_enrollment,Graduate,STEM,,Pvt Ltd,1,Junior,0.920
2,Male,No relevent experience,no_enrollment,High School,,,Pvt Ltd,never,Junior,0.624
3,Male,Has relevent experience,no_enrollment,Masters,STEM,10/49,Pvt Ltd,1,Senior,0.827
4,Male,Has relevent experience,no_enrollment,Graduate,STEM,10000+,Pvt Ltd,>4,Veteran,0.920
...,...,...,...,...,...,...,...,...,...,...
2124,Male,No relevent experience,no_enrollment,Graduate,Humanities,,Public Sector,4,Veteran,0.920
2125,Male,Has relevent experience,no_enrollment,Masters,STEM,,,2,Veteran,0.897
2126,Male,No relevent experience,no_enrollment,Primary School,,,Pvt Ltd,never,Junior,0.887
2127,Male,Has relevent experience,Full time course,High School,,100-500,Public Sector,1,Intermediate,0.804


In [44]:
# melakukan reprocessing menggunakan pipeline
df_inf_prep = pipeline.transform(df_inf_fix)
df_inf_prep

array([[2.   , 2.   , 0.   , ..., 0.   , 1.   , 0.827],
       [0.   , 2.   , 2.   , ..., 0.   , 1.   , 0.92 ],
       [0.   , 1.   , 2.   , ..., 0.   , 1.   , 0.624],
       ...,
       [0.   , 0.   , 2.   , ..., 0.   , 1.   , 0.887],
       [2.   , 1.   , 3.   , ..., 1.   , 0.   , 0.804],
       [0.   , 3.   , 7.   , ..., 0.   , 1.   , 0.804]])

In [46]:
# memprediksi menggunakan model yang sudah didapatkan dari ipynb utama
y_inf_predict = model_svc.predict(df_inf_prep)
y_inf_predict = pd.DataFrame(y_inf_predict,columns=['Target'])
y_inf_predict

Unnamed: 0,Target
0,1.0
1,1.0
2,1.0
3,0.0
4,0.0
...,...
2124,0.0
2125,0.0
2126,1.0
2127,1.0


In [47]:
# menggabungkan data prediksi dengan data awal.
df_final_predict = pd.concat([df_inf,y_inf_predict],axis=1)
df_final_predict

Unnamed: 0,enrollee_id,city,city_development_index,gender,relevent_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,experience_category,Target
0,32403,city_41,0.827,Male,Has relevent experience,Full time course,Graduate,STEM,9,<10,,1,21,Intermediate,1.0
1,9858,city_103,0.920,Female,Has relevent experience,no_enrollment,Graduate,STEM,5,,Pvt Ltd,1,98,Junior,1.0
2,31806,city_21,0.624,Male,No relevent experience,no_enrollment,High School,,<1,,Pvt Ltd,never,15,Junior,1.0
3,27385,city_13,0.827,Male,Has relevent experience,no_enrollment,Masters,STEM,11,10/49,Pvt Ltd,1,39,Senior,0.0
4,27724,city_103,0.920,Male,Has relevent experience,no_enrollment,Graduate,STEM,>20,10000+,Pvt Ltd,>4,72,Veteran,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2124,1289,city_103,0.920,Male,No relevent experience,no_enrollment,Graduate,Humanities,16,,Public Sector,4,15,Veteran,0.0
2125,195,city_136,0.897,Male,Has relevent experience,no_enrollment,Masters,STEM,18,,,2,30,Veteran,0.0
2126,31762,city_100,0.887,Male,No relevent experience,no_enrollment,Primary School,,3,,Pvt Ltd,never,18,Junior,1.0
2127,7873,city_102,0.804,Male,Has relevent experience,Full time course,High School,,7,100-500,Public Sector,1,84,Intermediate,1.0
