In [0]:
pip install -q -U google-generativeai

In [0]:
import pandas as pd
import os
from consts import DATA_PATH

# Load datasets
code_questions = pd.read_csv(os.path.join(DATA_PATH, "top_code_questions.csv")).drop_duplicates()
open_questions = pd.read_csv(os.path.join(DATA_PATH, "top_open_questions.csv")).drop_duplicates()

## Answering the questions

In [0]:
code_questions_and_answers = code_questions[['question']].drop_duplicates()
code_questions_and_answers['answer'] = ''

In [0]:
import google.generativeai as genai
import time
from api_keys import API_KEYS

def answer_code_question(question):
    if pd.isna(question) or question.strip() == '':
        return ''
    
    prompt = f"You are a candidate being interviewed for a specific role related to coding. Your task is to answer the questions provided to you in a professional, clear, and concise manner, as if you are participating in a real interview. Understand the Question: Take a moment to fully grasp what is being asked. The question requires a code answer, ensure your code is clear and functional. Answer Clearly: Provide structured responses, focusing on clarity and correctness in your code. The question might contain multiple requirements, input-output examples, hints, and other specifications. \nFor example: Question: 'Implement a function to reverse a linked list in Python.'. Your Answer: 'class ListNode: \n def __init__(self, val=0, next=None): \n\t self.val = val \n\t self.next = next \n def reverse_linked_list(head): \n\t prev = None \n\t current = head \n\t while current: \n\t\t next_node = current.next \n\t\t current.next = prev \n\t\t prev = current \n\t\t current = next_node \n\t return prev \n'. Here is the question for you: {question}"
   
    try:
       response = model.generate_content(prompt)
       answer = response.text.strip()
       print("response:", answer)
       return answer if answer else ''
    except Exception as e:
        print(f"Error: {e}")
        return ''
    

start_time = time.time()
running_time = 0
max_time = 3000
empty_answer_rows = code_questions_and_answers[code_questions_and_answers['question'].str.strip() != '']

while (running_time < max_time) and (empty_answer_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Filter the rows where the "answer" column is empty
        empty_answer_rows = code_questions_and_answers[(code_questions_and_answers['answer'] == '') & (code_questions_and_answers['question'].str.strip() != '')]
        if (empty_answer_rows.shape[0] == 0) or (time.time() - start_time > max_time):
            break

        # Get the indices of the first 15 rows with empty "answers"
        indices_to_update = empty_answer_rows.index[:15]

        # Apply the UDF only to the selected rows
        code_questions_and_answers.loc[indices_to_update, 'answer'] = (
            code_questions_and_answers.loc[indices_to_update, 'question']
                .apply(answer_code_question)
        )
    running_time = time.time() - start_time

In [0]:
from consts import GEMINI_SIMULATION_DATA_PATH

code_questions_and_answers.to_csv(os.path.join(GEMINI_SIMULATION_DATA_PATH, "code_questions_answers.csv"), index=False)

In [0]:
open_questions_and_answers = open_questions[['question']].drop_duplicates()
open_questions_and_answers['answer'] = ''

In [0]:
import google.generativeai as genai
import os
import time
import pandas as pd
from api_keys import API_KEYS
from consts import DATA_PATH

def answer_open_question(question):
    if pd.isna(question) or question.strip() == '':
        return ''
   
    prompt = f"You are a candidate being interviewed for a specific role. Your task is to answer the open questions provided to you in a professional, clear, and concise manner, as if you are participating in a real interview. Understand the Question: Take a moment to fully grasp what is being asked. The question may be a professional one or a question about you. If the question requires an example or explanation, make sure to provide one. Answer Clearly: Provide structured responses. Use full sentences, avoid unnecessary jargon, and ensure your answer directly addresses the question. Be Thoughtful: Demonstrate your expertise by including relevant concepts, examples, or real-world applications in your answers. Stay Brief: Avoid over-explaining but ensure your answer is complete. Strike a balance between depth and brevity. For example: Question: 'Can you explain the concept of overfitting in machine learning and provide a solution to address it?'. Your Answer: 'Overfitting occurs when a model learns the details and noise in the training data, leading to poor generalization on unseen data. To address it, we can use techniques like regularization (e.g., L1 or L2), reduce the complexity of the model, or employ cross-validation to ensure robust performance'.\nThis is the questions for you: {question}"

    try:
       response = model.generate_content(prompt)
       answer = response.text.strip()
       print("response:", answer)
       return answer if answer else ''
    except Exception as e:
        print(f"Error: {e}")
        return ''


start_time = time.time()
running_time = 0
max_time = 900
empty_answer_rows = open_questions_and_answers[open_questions['question'].str.strip() != '']

while (running_time < max_time) and (empty_answer_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Filter the rows where the "answer" column is empty
        empty_answer_rows = open_questions_and_answers[(open_questions_and_answers['answer'] == '') & (open_questions_and_answers['question'].str.strip() != '')]
        if (empty_answer_rows.shape[0] == 0) or (time.time() - start_time > max_time):
            break

        # Get the indices of the first 15 rows with empty "answers"
        indices_to_update = empty_answer_rows.index[:15]

        # Apply the UDF only to the selected rows
        open_questions_and_answers.loc[indices_to_update, 'answer'] = (
            open_questions_and_answers.loc[indices_to_update, 'question']
                .apply(answer_open_question)
        )
    running_time = time.time() - start_time

In [0]:
GEMINI_SIMULATION_DATA_PATH = os.path.join(DATA_PATH, 'gemini_simulation/')
os.makedirs(GEMINI_SIMULATION_DATA_PATH, exist_ok=True)
open_questions_and_answers.to_csv(os.path.join(GEMINI_SIMULATION_DATA_PATH, 'open_questions_answers.csv'))

## Giving feedback on the model

In [0]:
jobs_with_open_questions = pd.read_csv(os.path.join(DATA_PATH, "top_open_questions.csv")).drop_duplicates()
jobs_with_open_questions['feedback'] = ''

In [0]:
jobs_with_code_questions = pd.read_csv(os.path.join(DATA_PATH, "top_code_questions.csv")).drop_duplicates()
jobs_with_open_questions = pd.read_csv(os.path.join(DATA_PATH, "top_open_questions.csv")).drop_duplicates()

jobs_with_code_questions['feedback'] = ''
jobs_with_open_questions['feedback'] = ''

In [0]:
import google.generativeai as genai
import os
import time
import pandas as pd
from api_keys import API_KEYS
from consts import DATA_PATH

seniority_map = ["Internship", "Entry level/Associate", "Mid-Senior level/Manager and above"]

def question_feedback(question, job, level, job_summary):
    if pd.isna(question) or question.strip() == '':
        return ''
    
    # Handle NaN values in 'level'
    if pd.isna(level) or not str(level).isdigit():
        seniority = "Not specified"
    else:
        seniority = seniority_map[int(level)]

    prompt = f"We need to evaluate how well a given interview question aligns with the requirements of a specific job position. Your task is to assign two scores between 1 and 10 based on the following criteria: \nRelevance Score (1-10): How well does the question relate to the job's details? \nSuitability Score (1-10): How appropriate is the question for assessing a candidate’s fitness for this role in a real interview setting?.\n Provide the two scores as a comma-separated string. Evaluation Process: Consider the core skills and competencies needed for the job. Analyze whether the question effectively tests these skills or knowledge areas. \nInput Example: Job: 'Data Analyst'. seniority: 'junior'. Skills: 'SQL, RDBMS, Non-RDBMS, MySQL, PostgreSQL, MongoDB, Python, Jupyter Notebook, Inferential Statistics, Probability, ETL, Data Pipeline, Automated Reporting, Data Analysis, Statistical Modeling, Machine Learning, Git, Hive, Spark, Presto, Diagnostic Analytics, Forecasting, Big Data, problem-solving.' \nQuestion: 'How would you handle missing values in a dataset?'. \nAnalysis: Relevance Score: 9 (The question directly relates to data preprocessing, which is crucial for data analysts). Suitability Score: 8 (A good question for assessing problem-solving skills, though it could be more role-specific by considering the industry context). \nOutput: 9,8. \nProvide the Relevance and Suitability scores to the following job details and question: Job: '{job}', seniority: '{seniority}', Job Summary: '{job_summary}', \nQuestion: '{question}'."

    try:
       response = model.generate_content(prompt)
       feedback = response.text.strip()
       print("response:", feedback)
       return feedback if feedback else ''
    except Exception as e:
        print(f"Error: {e}")
        return ''


start_time = time.time()
running_time = 0
max_time = 240
empty_feedback_rows = jobs_with_code_questions[jobs_with_code_questions['question'].str.strip() != '']

while (running_time < max_time) and (empty_feedback_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Filter the rows where the "feedback" column is empty
        empty_feedback_rows = jobs_with_code_questions[(jobs_with_code_questions['feedback'] == '') & (jobs_with_code_questions['question'].str.strip() != '')]
        if (empty_feedback_rows.shape[0] == 0) or (time.time() - start_time > max_time):
            break

        # Get the indices of the first 15 rows with empty "feedback"
        indices_to_update = empty_feedback_rows.index[:15]

        # Apply the function with all required columns
        jobs_with_code_questions.loc[indices_to_update, 'feedback'] = jobs_with_code_questions.loc[indices_to_update].apply(
            lambda row: question_feedback(row['question'], row['job_title'], row['level'], row['job_summary']), axis=1
        )
    running_time = time.time() - start_time
    time.sleep(30)


In [0]:
jobs_with_code_questions.to_csv(os.path.join(GEMINI_SIMULATION_DATA_PATH, 'code_questions_feedback.csv'))

In [0]:

seniority_map = ["Internship", "Entry level/Associate", "Mid-Senior level/Manager and above"]

def question_feedback(question, job, level, job_summary):
    if pd.isna(question) or question.strip() == '':
        return ''
    
    # Handle NaN values in 'level'
    if pd.isna(level) or not str(level).isdigit():
        seniority = "Not specified"
    else:
        seniority = seniority_map[int(level)]

    prompt = f"We need to evaluate how well a given interview question aligns with the requirements of a specific job position. Your task is to assign two scores between 1 and 10 based on the following criteria: \nRelevance Score (1-10): How well does the question relate to the job's details? \nSuitability Score (1-10): How appropriate is the question for assessing a candidate’s fitness for this role in a real interview setting?.\n Provide the two scores as a comma-separated string. Evaluation Process: Consider the core skills and competencies needed for the job. Analyze whether the question effectively tests these skills or knowledge areas. \nInput Example: Job: 'Data Analyst'. seniority: 'junior'. Skills: 'SQL, RDBMS, Non-RDBMS, MySQL, PostgreSQL, MongoDB, Python, Jupyter Notebook, Inferential Statistics, Probability, ETL, Data Pipeline, Automated Reporting, Data Analysis, Statistical Modeling, Machine Learning, Git, Hive, Spark, Presto, Diagnostic Analytics, Forecasting, Big Data, problem-solving.' \nQuestion: 'How would you handle missing values in a dataset?'. \nAnalysis: Relevance Score: 9 (The question directly relates to data preprocessing, which is crucial for data analysts). Suitability Score: 8 (A good question for assessing problem-solving skills, though it could be more role-specific by considering the industry context). \nOutput: 9,8. \nProvide the Relevance and Suitability scores to the following job details and question: Job: '{job}', seniority: '{seniority}', Job Summary: '{job_summary}', \nQuestion: '{question}'."

    try:
       response = model.generate_content(prompt)
       feedback = response.text.strip()
       print("response:", feedback)
       return feedback if feedback else ''
    except Exception as e:
        print(f"Error: {e}")
        return ''

In [0]:
import time
import google.generativeai as genai
from api_keys import API_KEYS

start_time = time.time()
running_time = 0
max_time = 900
empty_feedback_rows = jobs_with_open_questions[jobs_with_open_questions['question'].str.strip() != '']

while (running_time < max_time) and (empty_feedback_rows.shape[0] > 0):
    for api_key in API_KEYS.values():
        # Configure Gemini API
        os.environ['GOOGLE_API_KEY'] = api_key
        genai.configure(api_key=os.environ['GOOGLE_API_KEY'])
        model = genai.GenerativeModel('gemini-1.5-flash')

        # Filter the rows where the "feedback" column is empty
        empty_feedback_rows = jobs_with_open_questions[(jobs_with_open_questions['feedback'] == '') & (jobs_with_open_questions['question'].str.strip() != '')]
        if (empty_feedback_rows.shape[0] == 0) or (time.time() - start_time > max_time):
            break

        # Get the indices of the first 15 rows with empty "feedback"
        indices_to_update = empty_feedback_rows.index[:15]

        # Apply the function with all required columns
        jobs_with_open_questions.loc[indices_to_update, 'feedback'] = jobs_with_open_questions.loc[indices_to_update].apply(
            lambda row: question_feedback(row['question'], row['job_title'], row['level'], row['job_summary']), axis=1
        )
    running_time = time.time() - start_time

In [0]:
jobs_with_open_questions.to_csv(os.path.join(GEMINI_SIMULATION_DATA_PATH, 'open_questions_feedback.csv'))

In [0]:
display(jobs_with_open_questions)

## Visualising results
Visualisations and metric calculations of code questions' true vs. predicted results.

In [0]:
import os
from pyspark.sql import SparkSession
import pandas as pd
from consts import GEMINI_SIMULATION_DATA_PATH, open_csv_file

# Initialize Spark session
spark = SparkSession.builder.appName("MatchingEvaluation").getOrCreate()
jobs_with_code_questions = open_csv_file(spark, GEMINI_SIMULATION_DATA_PATH, 'code_questions_feedback.csv') \
    .select('heuristic_score', 'feedback')

In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.optimize import curve_fit
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PowerTransformer
from scipy.stats import spearmanr, pearsonr, kendalltau
from pyspark.sql.functions import col, regexp_extract

# Extract only the first two numeric values from feedback and discard extra data
predicted_and_true_scores = jobs_with_code_questions.withColumn("relevance_score", regexp_extract(col("feedback"), "(\\d+)", 1).cast("float")) \
    .withColumn("suitability_score", regexp_extract(col("feedback"), "(\\d+),\\s*(\\d+)", 2).cast("float")) \
    .withColumn("true_score", (col("relevance_score") + col("suitability_score")) / 2)

# Load the data
predicted_and_true_scores_df = predicted_and_true_scores.toPandas()

# Logistic Transformation
predicted_and_true_scores_df["transformed_pred"] = 1 / (1 + np.exp(-predicted_and_true_scores_df["heuristic_score"]))

# Fit Linear Regression
reg = LinearRegression()
reg.fit(predicted_and_true_scores_df[["transformed_pred"]], predicted_and_true_scores_df["true_score"] / 10)  # Normalize true score
predicted_and_true_scores_df["adjusted_pred"] = reg.predict(predicted_and_true_scores_df[["transformed_pred"]])

# Optional: Power Transformation (Box-Cox / Yeo-Johnson)
pt = PowerTransformer(method='yeo-johnson')
predicted_and_true_scores_df["power_transformed_pred"] = pt.fit_transform(predicted_and_true_scores_df[["adjusted_pred"]])

# Evaluate Improvements
mse = ((predicted_and_true_scores_df["true_score"] / 10 - predicted_and_true_scores_df["adjusted_pred"]) ** 2).mean()
mae = abs(predicted_and_true_scores_df["true_score"] / 10 - predicted_and_true_scores_df["adjusted_pred"]).mean()
rmse = mse ** 0.5

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

# Scatter Plot
plt.figure(figsize=(8,6))
sns.scatterplot(x=predicted_and_true_scores_df["adjusted_pred"], y=predicted_and_true_scores_df["true_score"] / 10, alpha=0.6)
plt.xlabel("Transformed Predicted Score")
plt.ylabel("Normalized True Score")
plt.title("Scatter Plot of Transformed Predicted vs True Scores (Code Questions)")
plt.show()

# Histogram
plt.figure(figsize=(8,6))
sns.histplot(predicted_and_true_scores_df["adjusted_pred"], kde=True, color='blue', label='Adjusted Predicted Scores', alpha=0.6)
sns.histplot(predicted_and_true_scores_df["true_score"] / 10, kde=True, color='red', label='Normalized True Scores', alpha=0.6)
plt.legend()
plt.xlabel("Score")
plt.title("Distribution of Transformed Predicted and True Scores (Code Questions)")
plt.show()

# Residuals Distribution
residuals = predicted_and_true_scores_df["true_score"] / 10 - predicted_and_true_scores_df["adjusted_pred"]
plt.figure(figsize=(8,6))
sns.histplot(residuals, kde=True, bins=30, color="purple", alpha=0.6)
plt.axvline(0, color='red', linestyle='dashed')
plt.xlabel("Residual (True - Predicted)")
plt.ylabel("Count")
plt.title("Residual Distribution (Code Questions)")
plt.show()

# Correlation Analysis
spearman_corr, spearman_p = spearmanr(predicted_and_true_scores_df["adjusted_pred"], predicted_and_true_scores_df["true_score"] / 10)
pearson_corr, pearson_p = pearsonr(predicted_and_true_scores_df["adjusted_pred"], predicted_and_true_scores_df["true_score"] / 10)
kendall_corr, kendall_p = kendalltau(predicted_and_true_scores_df["adjusted_pred"], predicted_and_true_scores_df["true_score"] / 10)

print(f"Spearman Correlation: {spearman_corr:.4f} (p-value: {spearman_p:.4f})")
print(f"Pearson Correlation: {pearson_corr:.4f} (p-value: {pearson_p:.4f})")
print(f"Kendall's Tau: {kendall_corr:.4f} (p-value: {kendall_p:.4f})")


In [0]:
from sklearn.metrics import mean_squared_error, mean_absolute_error
from scipy.stats import spearmanr, pearsonr
import numpy as np

def compare_model_to_baseline(predicted_and_true_scores_df):
    # Extract the true normalized scores for comparison
    true_scores = predicted_and_true_scores_df["true_score"] / 10

    # Baseline 1: Mean Prediction
    mean_score = true_scores.mean()
    mean_predictions = np.full_like(true_scores, mean_score)

    # Metrics for Mean Prediction
    spearman_mean, _ = spearmanr(mean_predictions, true_scores)
    pearson_mean, _ = pearsonr(mean_predictions, true_scores)
    mse_mean = mean_squared_error(true_scores, mean_predictions)
    mae_mean = mean_absolute_error(true_scores, mean_predictions)
    rmse_mean = np.sqrt(mse_mean)

    # Baseline 2: Random Prediction
    random_predictions = np.random.uniform(low=0.0, high=1.0, size=true_scores.shape)

    # Metrics for Random Prediction
    spearman_random, _ = spearmanr(random_predictions, true_scores)
    pearson_random, _ = pearsonr(random_predictions, true_scores)
    mse_random = mean_squared_error(true_scores, random_predictions)
    mae_random = mean_absolute_error(true_scores, random_predictions)
    rmse_random = np.sqrt(mse_random)

    # Store Baseline Metrics
    baseline_metrics = {
        "Mean Prediction": {"MSE": mse_mean, "MAE": mae_mean, "RMSE": rmse_mean, "Spearman": spearman_mean, "Pearson": pearson_mean},
        "Random Prediction": {"MSE": mse_random, "MAE": mae_random, "RMSE": rmse_random, "Spearman": spearman_random, "Pearson": pearson_random}
    }

    # Model's Metrics (Assuming these are computed earlier in the workflow)
    model_metrics = {
        "MSE": mse,
        "MAE": mae,
        "RMSE": rmse,
        "Spearman": spearman_corr,
        "Pearson": pearson_corr
    }

    # Print Results
    print(f"{'Metric':<15} {'Model':<15} {'Mean Baseline':<15} {'Random Baseline':<15}")
    for metric, model_value in model_metrics.items():
        mean_baseline_value = baseline_metrics["Mean Prediction"].get(metric, "N/A")
        random_baseline_value = baseline_metrics["Random Prediction"].get(metric, "N/A")
        print(f"{metric:<15} {model_value:<15.4f} {mean_baseline_value:<15.4f} {random_baseline_value:<15.4f}")

compare_model_to_baseline(predicted_and_true_scores_df)

Visualisations and metric calculations of open questions' true vs. predicted results.

In [0]:
import os
from pyspark.sql import SparkSession
import pandas as pd
from consts import GEMINI_SIMULATION_DATA_PATH, open_csv_file

# Initialize Spark session
spark = SparkSession.builder.appName("MatchingEvaluation").getOrCreate()
jobs_with_open_questions = open_csv_file(spark, GEMINI_SIMULATION_DATA_PATH, 'open_questions_feedback.csv') \
    .select('heuristic_score', 'feedback')

In [0]:
from pyspark.sql.functions import col, avg, stddev, count, split, regexp_extract
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy.stats import spearmanr, pearsonr, kendalltau, ttest_rel
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler

# Extract only the first two numeric values from feedback
predicted_and_true_scores = jobs_with_open_questions.select('heuristic_score', 'feedback') \
    .withColumn("relevance_score", regexp_extract(col("feedback"), "(\\d+)", 1).cast("float")) \
    .withColumn("suitability_score", regexp_extract(col("feedback"), "(\\d+),\\s*(\\d+)", 2).cast("float")) \
    .withColumn("true_score", (col("relevance_score") + col("suitability_score")) / 2)

# Convert to Pandas for visualization and analysis
predicted_and_true_scores_df = predicted_and_true_scores.toPandas()

# Normalize true scores (range [0,1])
predicted_and_true_scores_df["true_score_normalized"] = predicted_and_true_scores_df["true_score"] / 10
predicted_and_true_scores_df["final_predicted_score"] = predicted_and_true_scores_df["heuristic_score"]

# Error Metrics
mse = ((predicted_and_true_scores_df["true_score_normalized"] - predicted_and_true_scores_df["final_predicted_score"]) ** 2).mean()
mae = abs(predicted_and_true_scores_df["true_score_normalized"] - predicted_and_true_scores_df["final_predicted_score"]).mean()
rmse = mse ** 0.5

print(f"MSE: {mse:.4f}")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")

# Scatter Plot
plt.figure(figsize=(8,6))
sns.scatterplot(x=predicted_and_true_scores_df["final_predicted_score"], y=predicted_and_true_scores_df["true_score_normalized"], alpha=0.6)
plt.xlabel("Transformed Predicted Score")
plt.ylabel("Normalized True Score")
plt.title("Scatter Plot of Transformed Predicted vs True Scores (Open Questions)")
plt.show()

# Histogram
plt.figure(figsize=(8,6))
sns.histplot(predicted_and_true_scores_df["final_predicted_score"], kde=True, color='blue', label='Final Predicted Scores', alpha=0.6)
sns.histplot(predicted_and_true_scores_df["true_score_normalized"], kde=True, color='red', label='Normalized True Scores', alpha=0.6)
plt.legend()
plt.xlabel("Score")
plt.title("Distribution of Transformed Predicted and True Scores (Open Questions)")
plt.show()

# Residual Distribution
residuals = predicted_and_true_scores_df["true_score_normalized"] - predicted_and_true_scores_df["final_predicted_score"]
plt.figure(figsize=(8,6))
sns.histplot(residuals, kde=True, bins=30, color="purple", alpha=0.6)
plt.axvline(0, color='red', linestyle='dashed')
plt.xlabel("Residual (True - Predicted)")
plt.ylabel("Count")
plt.title("Residual Distribution (Open Questions)")
plt.show()

# Correlation Analysis
spearman_corr, _ = spearmanr(predicted_and_true_scores_df["final_predicted_score"], predicted_and_true_scores_df["true_score_normalized"])
pearson_corr, _ = pearsonr(predicted_and_true_scores_df["final_predicted_score"], predicted_and_true_scores_df["true_score_normalized"])
kendall_corr, _ = kendalltau(predicted_and_true_scores_df["final_predicted_score"], predicted_and_true_scores_df["true_score_normalized"])

print(f"Spearman Correlation: {spearman_corr:.3f}")
print(f"Pearson Correlation: {pearson_corr:.3f}")
print(f"Kendall's Tau: {kendall_corr:.3f}")


In [0]:
compare_model_to_baseline(predicted_and_true_scores_df)