In [1]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeRegressor

In [3]:
profile_score_df = pd.read_csv('Profile_score_csv.csv')
profile_score_df.head()

Unnamed: 0,ID,Recommenders ID,Years of Work Experience,Total Number of LORs,Total Valid LORs
0,0,"[218, 391]",15,2,2
1,1,"[412, 869, 233, 289]",23,4,3
2,2,"[582, 624, 592, 662, 469]",22,5,3
3,3,"[194, 122]",19,2,1
4,4,"[763, 726, 589, 977, 950, 543, 30]",19,7,6


In [4]:
# Calculating the ratio of Total Valid LORs to Total Number of LORs
profile_score_df['Valid_LOR_Ratio'] = profile_score_df['Total Valid LORs'] / profile_score_df['Total Number of LORs']

In [5]:
# Defining the initial weights
weight_valid_lor_ratio = 1
weight_work_experience = 1

In [6]:
# Calculating the initial profile score using the initial weights
profile_score_df['profile_score'] = (
    (profile_score_df['Valid_LOR_Ratio'] * weight_valid_lor_ratio) +
    (profile_score_df['Years of Work Experience'] * weight_work_experience)
)

In [7]:
profile_score_df.head()

Unnamed: 0,ID,Recommenders ID,Years of Work Experience,Total Number of LORs,Total Valid LORs,Valid_LOR_Ratio,profile_score
0,0,"[218, 391]",15,2,2,1.0,16.0
1,1,"[412, 869, 233, 289]",23,4,3,0.75,23.75
2,2,"[582, 624, 592, 662, 469]",22,5,3,0.6,22.6
3,3,"[194, 122]",19,2,1,0.5,19.5
4,4,"[763, 726, 589, 977, 950, 543, 30]",19,7,6,0.857143,19.857143


In [8]:
# Now, using the Decision Trees (Information Gain) to upadate the weights
features_profile = ['Valid_LOR_Ratio', 'Years of Work Experience']
X_profile = profile_score_df[features_profile]
y_profile = profile_score_df['profile_score']


In [9]:
# Precuations (removing nan and inf values)
X_profile_cleaned = X_profile.replace([np.inf, -np.inf], np.nan).dropna()
y_profile_cleaned = y_profile.loc[X_profile_cleaned.index]

In [11]:
tree_model_profile = DecisionTreeRegressor(random_state=42)
tree_model_profile.fit(X_profile_cleaned, y_profile_cleaned)

In [12]:
feature_importances_profile = tree_model_profile.feature_importances_

In [13]:
updated_weight_valid_lor_ratio = feature_importances_profile[0]
updated_weight_work_experience = feature_importances_profile[1]

In [14]:
profile_score_df['profile_score_updated'] = (
    (profile_score_df['Valid_LOR_Ratio'] * updated_weight_valid_lor_ratio) +
    (profile_score_df['Years of Work Experience'] * updated_weight_work_experience)
)

In [15]:
min_profile_value_updated = profile_score_df['profile_score_updated'].min()
max_profile_value_updated = profile_score_df['profile_score_updated'].max()

In [16]:
profile_score_df['profile_score_updated_normalized'] = (
    (profile_score_df['profile_score_updated'] - min_profile_value_updated) / (max_profile_value_updated - min_profile_value_updated)
)

In [17]:
profile_score_df.head()

Unnamed: 0,ID,Recommenders ID,Years of Work Experience,Total Number of LORs,Total Valid LORs,Valid_LOR_Ratio,profile_score,profile_score_updated,profile_score_updated_normalized
0,0,"[218, 391]",15,2,2,1.0,16.0,14.98102,0.437532
1,1,"[412, 869, 233, 289]",23,4,3,0.75,23.75,22.969836,0.937482
2,2,"[582, 624, 592, 662, 469]",22,5,3,0.6,22.6,21.970988,0.874973
3,3,"[194, 122]",19,2,1,0.5,19.5,18.97492,0.687475
4,4,"[763, 726, 589, 977, 950, 543, 30]",19,7,6,0.857143,19.857143,18.975404,0.687506


In [19]:
# Exporting only the ID and profile_score_updated_normalized in a csv
output_df = profile_score_df[['ID', 'profile_score_updated_normalized']]

# Saving the dataframe to a CSV file
output_file_path = 'profile_score_updated_normalized.csv'
output_df.to_csv(output_file_path, index=False)