# **css calculate**

In [None]:
import pandas as pd
import numpy as np
from scipy.spatial.distance import jensenshannon

def load_and_clean_data(filepath):
    """Read Excel file, normalise subject names"""
    df = pd.read_excel(filepath, usecols=['Course', 'Credits'])
    df['Course'] = df['Course'].str.lower().str.strip()
    return df

def create_probability_distribution(df):
    """Create a probability distribution based on course credits"""
    df['Probability'] = df['Credits']
    return df.set_index('Course')['Probability']

def calculate_jsd(p1, p2):
    """Calculate Jensen-Shannon Divergence"""
    jsd = jensenshannon(p1, p2, base=2)
    return jsd**2  # Return the square of the JSD, expressed as divergence


def analyze_multidisciplinary_interdisciplinary(path_double_major, path_half_major_one, path_half_major_two):
    # Importing and sorting data
    df_double = load_and_clean_data(path_double_major)
    df_half_one = load_and_clean_data(path_half_major_one)
    df_half_two = load_and_clean_data(path_half_major_two)

    # Create a subject set
    set_double_major = set(df_double['Course'])
    set_half_one = set(df_half_one['Course'])
    set_half_two = set(df_half_two['Course'])
    set_half_union = set_half_one.union(set_half_two)

    # Unpredictability between the union of Single Majors and Double Majors (measured by JSD)
    p_double_major = create_probability_distribution(df_double)
    p_half_union = create_probability_distribution(pd.concat([df_half_one, df_half_two]).drop_duplicates(subset='Course'))

    # match indexes for all subjects to make them comparable
    all_courses = p_double_major.index.union(p_half_union.index)
    p_double_major = p_double_major.reindex(all_courses, fill_value=0)
    p_half_union = p_half_union.reindex(all_courses, fill_value=0)

    # calculate JSD
    jsd_score = calculate_jsd(p_half_union, p_double_major)

    # Calculate similarity between single majors
    similarity_score = len(set_half_one.intersection(set_half_two)) / len(set_half_one.union(set_half_two))

    # New css definitions
    css = similarity_score * jsd_score

    return {
        'ω Score': jsd_score,
        'Similarity Score': similarity_score,
        'Interdisciplinary Strength Score (css)': css
    }

In [None]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def process_all_combinations(double_folder, single_folder, output_path):
    results = []

    for filename in os.listdir(double_folder):
        if filename.endswith('_double_major.xlsx'):
            double_major_path = os.path.join(double_folder, filename)
            major_name = filename.replace('_double_major.xlsx', '')
            major_one, major_two = major_name.split('_')

            single_major_one_path = os.path.join(single_folder, f"{major_one}_single_major.xlsx")
            single_major_two_path = os.path.join(single_folder, f"{major_two}_single_major.xlsx")

            if os.path.exists(single_major_one_path) and os.path.exists(single_major_two_path):
                analysis_results = analyze_multidisciplinary_interdisciplinary(double_major_path, single_major_one_path, single_major_two_path)
                results.append({
                    'Combination': major_name,
                    'ω': analysis_results['ω Score'],
                    'Similarity': analysis_results['Similarity Score'],
                    'css': analysis_results['Interdisciplinary Strength Score (css)']
                })

    results_df = pd.DataFrame(results)

    results_df.to_excel(output_path, index=False)
    return results_df
