# Calculating the **Relevancy Score**, **Profile Score**, and **Matching Score** for DRDO experts

## Load the Required Libraries and Data

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler

# Load the CSV files
experts_df = pd.read_csv('experts.csv')
interview_subjects_df, interview_subjects_dfx = pd.read_csv('interview_subjects.csv'),pd.read_csv('interview_subjects.csv')
candidates_df = pd.read_csv('candidates.csv')

## Preprocessing the Data

In [None]:
def preprocess_data(df):
    label_encoders = {}
    for column in df.columns:
        if df[column].dtype == 'object':
            le = LabelEncoder()
            df[column] = le.fit_transform(df[column].astype(str))
            label_encoders[column] = le
    return df, label_encoders

# Preprocess all dataframes
experts_df, experts_encoders = preprocess_data(experts_df)
interview_subjects_df, interview_subjects_encoders = preprocess_data(interview_subjects_df)
candidates_df, candidates_encoders = preprocess_data(candidates_df)


## Calculate Relevancy Score Between Experts and Interview Subjects

In [None]:
# Map interview subject features to corresponding expert features
feature_mapping = {
    'Interview_Subfields_Specializations': 'Expert_Field_of_Study',
    'Interview_Required_Technical_Skills': 'Expert_Technical_Skills',
    'Interview_Specific_Technologies': 'Expert_Relevant_Technology',
    'Interview_Core_Concepts': 'Expert_Technical_Skills',  # Assuming a match
    'Interview_Level_of_Expertise': 'Expert_Industry_Experience'
}

# Extract the relevant columns
experts_mapped_df = experts_df[feature_mapping.values()]
interview_subjects_mapped_df = interview_subjects_df[feature_mapping.keys()]

# Rename columns in interview_subjects_mapped_df to match experts_mapped_df
interview_subjects_mapped_df.columns = experts_mapped_df.columns

# Combine experts and interview subjects for scaling
combined_df = pd.concat([experts_mapped_df, interview_subjects_mapped_df], axis=0)

# Standardize the combined data
scaler = StandardScaler()
combined_scaled = scaler.fit_transform(combined_df)

# Split the scaled data back into experts and interview subjects
experts_scaled = combined_scaled[:len(experts_mapped_df)]
interview_subjects_scaled = combined_scaled[len(experts_mapped_df):]

# Calculate Relevancy Score
relevancy_scores = cosine_similarity(experts_scaled, interview_subjects_scaled)


## Calculate Profile Score Between Experts and Candidates

In [None]:
# Map candidate features to corresponding expert features
candidate_feature_mapping = {
    'Candidate_Field_of_Study': 'Expert_Field_of_Study',
    'Candidate_Technical_Skills': 'Expert_Technical_Skills',
    'Candidate_Industry_Experience': 'Expert_Industry_Experience',
    'Candidate_Certifications': 'Expert_Certifications'
}

# Extract the relevant columns
experts_mapped_for_candidates_df = experts_df[candidate_feature_mapping.values()]
candidates_mapped_df = candidates_df[candidate_feature_mapping.keys()]

# Rename columns in candidates_mapped_df to match experts_mapped_for_candidates_df
candidates_mapped_df.columns = experts_mapped_for_candidates_df.columns

# Combine experts and candidates for scaling
combined_candidates_df = pd.concat([experts_mapped_for_candidates_df, candidates_mapped_df], axis=0)

# Standardize the combined data
scaler_candidates = StandardScaler()
combined_candidates_scaled = scaler_candidates.fit_transform(combined_candidates_df)

# Split the scaled data back into experts and candidates
experts_scaled_for_candidates = combined_candidates_scaled[:len(experts_mapped_for_candidates_df)]
candidates_scaled = combined_candidates_scaled[len(experts_mapped_for_candidates_df):]


# Calculate Profile Score
profile_scores = cosine_similarity(experts_scaled_for_candidates, candidates_scaled)


## Calculate the Matching Score

In [None]:
# Initialize an empty list to store matching scores
matching_scores_list = []

# Loop over interview subjects to calculate matching scores for each
for i in range(interview_subjects_df.shape[0]):
    # Relevancy score for the current interview subject
    relevancy_score = relevancy_scores[:, i].reshape(-1, 1)

    # Calculate Matching Score for each expert-candidate pair
    matching_score = (relevancy_score + profile_scores) / 2

    # Store the result in the list
    matching_scores_list.append(matching_score)

# Combine all matching scores into a single matrix (experts, candidates, interview subjects)
matching_scores_combined = np.stack(matching_scores_list, axis=-1)


## Filter and Save Results to an Excel File

In [None]:
# Initialize an empty list to store rows
rows = []

# Loop through all indices to extract relevant data
for expert_idx in range(matching_scores_combined.shape[0]):
    for candidate_idx in range(matching_scores_combined.shape[1]):
        for subject_idx in range(matching_scores_combined.shape[2]):
            score = matching_scores_combined[expert_idx,
                                             candidate_idx, subject_idx]
            if score >= 0.7:  # Only include scores above 70%
                rows.append({
                    'SNo': len(rows) + 1,
                    'Expert_ID': experts_df.iloc[expert_idx]['Expert_ID'],
                    # Assuming 'Candidate_Name' is unique
                    'Candidate_ID': candidates_df.iloc[candidate_idx]['Candidate_ID'],
                    'Interview_Subject_Job_Role': interview_subjects_dfx.iloc[subject_idx]['Interview_Job_Role'],
                    'Matching_Score': score*100
                })

# Convert the list of rows into a DataFrame
matching_scores_df = pd.DataFrame(rows)

# Sort the DataFrame by Matching_Score in descending order
matching_scores_df = matching_scores_df.sort_values(
    by='Matching_Score', ascending=False).reset_index(drop=True)

# Save the DataFrame to an Excel file
matching_scores_df.to_excel('matching_scores.xlsx', index=False)

## Prieview of the Final Score Table 

In [None]:
display(matching_scores_df)