In [3]:
# Import necessary libraries
import pandas as pd

# Load the dataset
ds_jobs = pd.read_csv("customer_train.csv")

# View the dataset
ds_jobs.head(5)

Unnamed: 0,student_id,city,city_development_index,gender,relevant_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,job_change
0,8949,city_103,0.92,Male,Has relevant experience,no_enrollment,Graduate,STEM,>20,,,1,36,1.0
1,29725,city_40,0.776,Male,No relevant experience,no_enrollment,Graduate,STEM,15,50-99,Pvt Ltd,>4,47,0.0
2,11561,city_21,0.624,,No relevant experience,Full time course,Graduate,STEM,5,,,never,83,0.0
3,33241,city_115,0.789,,No relevant experience,,Graduate,Business Degree,<1,,Pvt Ltd,never,52,1.0
4,666,city_162,0.767,Male,Has relevant experience,no_enrollment,Masters,STEM,>20,50-99,Funded Startup,4,8,0.0


In [4]:
# Create a copy of the original dataset for transformation
ds_jobs_transformed = ds_jobs.copy()

# Mapping relevant experience values to boolean
experience = {"Has relevant experience": True, "No relevant experience": False}
ds_jobs_transformed["relevant_experience"] = ds_jobs_transformed["relevant_experience"].replace(experience)

# Converting job change column to boolean type
ds_jobs_transformed["job_change"] = ds_jobs_transformed["job_change"].astype("bool")

# Converting student ID and training hours to 32-bit integer
ds_jobs_transformed["student_id"] = ds_jobs_transformed["student_id"].astype("int32")
ds_jobs_transformed["training_hours"] = ds_jobs_transformed["training_hours"].astype("int32")

# Converting city development index to 16-bit float
ds_jobs_transformed["city_development_index"] = ds_jobs_transformed["city_development_index"].astype("float16")

# Converting gender, city, major discipline, and company type to categorical type
ds_jobs_transformed["gender"] = ds_jobs_transformed["gender"].astype("category")
ds_jobs_transformed["city"] = ds_jobs_transformed["city"].astype("category")
ds_jobs_transformed["major_discipline"] = ds_jobs_transformed["major_discipline"].astype("category")
ds_jobs_transformed["company_type"] = ds_jobs_transformed["company_type"].astype("category")

# Defining ordered categories for education level
ordered_education_level = ['Primary School', 'High School', 'Graduate', 'Masters', 'Phd']
ds_jobs_transformed["education_level"] = pd.Categorical(ds_jobs_transformed["education_level"], categories=ordered_education_level, ordered=True)

# Defining ordered categories for enrolled university status
ordered_enrolled = ['no_enrollment', 'Part time course', 'Full time cours']
ds_jobs_transformed['enrolled_university'] = pd.Categorical(ds_jobs_transformed['enrolled_university'], categories=ordered_enrolled, ordered=True)

# Defining ordered categories for last new job
ordered_last_new_job = ['never', '1', '2', '3', '4', '>4']
ds_jobs_transformed['last_new_job'] = pd.Categorical(ds_jobs_transformed['last_new_job'], categories=ordered_last_new_job, ordered=True)

# Defining categories for company size
company_size_categories = ['<10', '10-49', '50-99', '100-500', '500-999', '1000-4999', '5000-9999', '10000+']
ds_jobs_transformed['company_size'] = pd.Categorical(ds_jobs_transformed['company_size'], categories=company_size_categories, ordered=True)

# Defining categories for experience
experience_categories = ['<1'] + [str(i) for i in range(1, 21)] + ['>20']
ds_jobs_transformed['experience'] = pd.Categorical(ds_jobs_transformed['experience'], categories=experience_categories, ordered=True)

# Filtering dataset for specific conditions (experience >= 10 years and company size >= 1000-4999)
ds_jobs_transformed = ds_jobs_transformed[(ds_jobs_transformed['experience'] >= '10') & (ds_jobs_transformed['company_size'] >= '1000-4999')]

ds_jobs_transformed

  ds_jobs_transformed["relevant_experience"] = ds_jobs_transformed["relevant_experience"].replace(experience)


Unnamed: 0,student_id,city,city_development_index,gender,relevant_experience,enrolled_university,education_level,major_discipline,experience,company_size,company_type,last_new_job,training_hours,job_change
9,699,city_103,0.919922,,True,no_enrollment,Graduate,STEM,17,10000+,Pvt Ltd,>4,123,False
12,25619,city_61,0.913086,Male,True,no_enrollment,Graduate,STEM,>20,1000-4999,Pvt Ltd,3,23,False
31,22293,city_103,0.919922,Male,True,Part time course,Graduate,STEM,19,5000-9999,Pvt Ltd,>4,141,False
34,26494,city_16,0.910156,Male,True,no_enrollment,Graduate,Business Degree,12,5000-9999,Pvt Ltd,3,145,False
40,2547,city_114,0.925781,Female,True,,Masters,STEM,16,1000-4999,Public Sector,2,14,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19097,25447,city_103,0.919922,Male,True,no_enrollment,Graduate,STEM,>20,1000-4999,Pvt Ltd,>4,57,False
19101,6803,city_16,0.910156,Male,True,no_enrollment,High School,,10,10000+,Pvt Ltd,1,89,False
19103,32932,city_10,0.895020,Male,True,Part time course,Masters,Other,>20,1000-4999,Pvt Ltd,>4,18,False
19128,3365,city_16,0.910156,,True,no_enrollment,Graduate,Humanities,>20,1000-4999,Pvt Ltd,>4,23,False
