In [None]:
import pandas as pd
import numpy as np
import random

def calculate_deltas(df):
    """
    Calculate delta scores for Age, Gender, Landmark, Country, and Language
    while keeping other parameters constant
    """
    # Initialize new columns for deltas
    df['Delta_Model_Age'] = np.nan
    df['Delta_Model_Gender'] = np.nan
    df['Delta_Model_Nationality'] = np.nan
    df['Delta_Model_Landmark'] = np.nan
    df['Delta_Model_Language'] = np.nan

    # Get unique values for each parameter
    unique_ages = df['Age'].unique()
    unique_genders = df['Gender'].unique()
    unique_countries = df['Country'].unique()
    unique_landmarks = df['Landmark'].unique()
    unique_languages = df['Language'].unique()

    # Process each row
    for idx, row in df.iterrows():
        # For Age
        possible_ages = [age for age in unique_ages if age != row['Age']]
        if possible_ages:
            random_age = random.choice(possible_ages)
            matching_row = df[
                (df['Gender'] == row['Gender']) &
                (df['Country'] == row['Country']) &
                (df['Landmark'] == row['Landmark']) &
                (df['Language'] == row['Language']) &
                (df['Age'] == random_age)
            ]
            if not matching_row.empty:
                df.at[idx, 'Delta_Model_Age'] = abs(
                    row['Similarity_Score'] - matching_row.iloc[0]['Similarity_Score']
                )

        # For Gender
        possible_genders = [gender for gender in unique_genders if gender != row['Gender']]
        if possible_genders:
            random_gender = random.choice(possible_genders)
            matching_row = df[
                (df['Age'] == row['Age']) &
                (df['Country'] == row['Country']) &
                (df['Landmark'] == row['Landmark']) &
                (df['Language'] == row['Language']) &
                (df['Gender'] == random_gender)
            ]
            if not matching_row.empty:
                df.at[idx, 'Delta_Model_Gender'] = abs(
                    row['Similarity_Score'] - matching_row.iloc[0]['Similarity_Score']
                )

        # For Country
        possible_countries = [country for country in unique_countries if country != row['Country']]
        if possible_countries:
            random_country = random.choice(possible_countries)
            matching_row = df[
                (df['Age'] == row['Age']) &
                (df['Gender'] == row['Gender']) &
                (df['Landmark'] == row['Landmark']) &
                (df['Language'] == row['Language']) &
                (df['Country'] == random_country)
            ]
            if not matching_row.empty:
                df.at[idx, 'Delta_Model_Nationality'] = abs(
                    row['Similarity_Score'] - matching_row.iloc[0]['Similarity_Score']
                )

        # For Landmark
        possible_landmarks = [landmark for landmark in unique_landmarks if landmark != row['Landmark']]
        if possible_landmarks:
            random_landmark = random.choice(possible_landmarks)
            matching_row = df[
                (df['Age'] == row['Age']) &
                (df['Gender'] == row['Gender']) &
                (df['Country'] == row['Country']) &
                (df['Language'] == row['Language']) &
                (df['Landmark'] == random_landmark)
            ]
            if not matching_row.empty:
                df.at[idx, 'Delta_Model_Landmark'] = abs(
                    row['Similarity_Score'] - matching_row.iloc[0]['Similarity_Score']
                )

        # For Language
        possible_languages = [language for language in unique_languages if language != row['Language']]
        if possible_languages:
            random_language = random.choice(possible_languages)
            matching_row = df[
                (df['Age'] == row['Age']) &
                (df['Gender'] == row['Gender']) &
                (df['Country'] == row['Country']) &
                (df['Landmark'] == row['Landmark']) &
                (df['Language'] == random_language)
            ]
            if not matching_row.empty:
                df.at[idx, 'Delta_Model_Language'] = abs(
                    row['Similarity_Score'] - matching_row.iloc[0]['Similarity_Score']
                )

    return df

def process_file(input_file):
    """
    Process a single Excel file and save results
    """
    try:
        # Read the input file
        df = pd.read_excel(input_file)

        # Calculate deltas
        df_with_deltas = calculate_deltas(df)

        # Save to new file
        output_file = input_file.replace('.xlsx', '_with_deltas.xlsx')
        df_with_deltas.to_excel(output_file, index=False)
        print(f"Successfully processed {input_file} -> {output_file}")

    except Exception as e:
        print(f"Error processing {input_file}: {str(e)}")

def main():
    # Process both files
    files = ['Alt_Single.xlsx', 'Alt_Multi_V2.xlsx'] #Replace for other files.

    for file in files:
        process_file(file)

if __name__ == "__main__":
    # Set random seed for reproducibility
    random.seed(42)
    main()

Successfully processed Alt_Single.xlsx -> Alt_Single_with_deltas.xlsx
Successfully processed Alt_Multi_V2.xlsx -> Alt_Multi_V2_with_deltas.xlsx
