In [14]:
def do_ranking(melted_df):

    def rank_accuracies(group):
        # Sort by accuracy
        sorted_group = group.sort_values('accuracy')
        # Compute ranks
        sorted_group['rank'] = sorted_group['accuracy'].rank(method='average')
        return sorted_group

    # Apply ranking within each dataset_name
    ranked_df = melted_df.groupby('dataset_name').apply(rank_accuracies).reset_index(drop=True)

    # Compute average rank per classifier
    average_rank_per_classifier = ranked_df.groupby('classifier_name')['rank'].mean().reset_index()
    average_rank_per_classifier = average_rank_per_classifier.sort_values(by='rank', ascending=True).reset_index(drop=True)

    # Extract lists
    classifier_names = average_rank_per_classifier['classifier_name'].tolist()
    average_ranks = average_rank_per_classifier['rank'].tolist()

    # Display the lists
    print("Classifier Names:", classifier_names)
    print("Average Ranks:", average_ranks)

    # Return the DataFrame sorted by average rank
    return average_rank_per_classifier


In [15]:
import pandas as pd

# Load the ODS file
file_path = '/home/sim/Desktop/TS Extrinsic Regression/AutoML-Results.ods'
result_df = pd.read_excel(file_path, engine='odf', sheet_name='result')

# Transform the DataFrame to have classifier names as a single column
melted_df = result_df.melt(id_vars=['Dataset Name'], var_name='classifier_name', value_name='accuracy')

# Rename columns to match the desired output
melted_df = melted_df.rename(columns={'Dataset Name': 'dataset_name'})

# Drop any rows where accuracy is NaN
melted_df = melted_df.dropna(subset=['accuracy'])

# Now melted_df contains the required data
melted_df

pd.set_option('display.max_rows', None)

# Now when you print a DataFrame, it will show all rows
print(melted_df)


                   dataset_name classifier_name    accuracy
0              AppliancesEnergy       AutoGluon    2.155978
1            BeijingPM10Quality       AutoGluon  108.381598
2            BeijingPM25Quality       AutoGluon   75.187533
3          BenzeneConcentration       AutoGluon    3.874767
4                   Covid3Month       AutoGluon    0.040439
5                FloodModeling1       AutoGluon    0.013680
6                FloodModeling2       AutoGluon    0.019079
7    HouseholdPowerConsumption1       AutoGluon  145.957598
8    HouseholdPowerConsumption2       AutoGluon   46.925185
9                       IEEEPPG       AutoGluon   25.574776
10           NewsTitleSentiment       AutoGluon    0.138126
11             AppliancesEnergy           FLAML    2.516904
12           BeijingPM10Quality           FLAML  108.381598
13           BeijingPM25Quality           FLAML   76.546076
14         BenzeneConcentration           FLAML    4.001022
15                  Covid3Month         

In [16]:
do_ranking(melted_df=melted_df.copy())

Classifier Names: ['Rocket', 'AutoGluon', 'FCN', 'XGBoost', 'FLAML', 'Random Forest', 'ResNet', 'Inception', 'FPCR', 'FPCR-Bspline', '5-NN-DTWD', '5-NN-ED', 'SVR Optimised', 'SVR', '1-NN-DTWD', '1-NN-ED']
Average Ranks: [4.454545454545454, 5.7272727272727275, 6.090909090909091, 6.090909090909091, 6.2272727272727275, 6.636363636363637, 6.7272727272727275, 6.818181818181818, 9.090909090909092, 9.318181818181818, 9.363636363636363, 10.545454545454545, 10.954545454545455, 12.409090909090908, 12.545454545454545, 13.0]


Unnamed: 0,classifier_name,rank
0,Rocket,4.454545
1,AutoGluon,5.727273
2,FCN,6.090909
3,XGBoost,6.090909
4,FLAML,6.227273
5,Random Forest,6.636364
6,ResNet,6.727273
7,Inception,6.818182
8,FPCR,9.090909
9,FPCR-Bspline,9.318182


In [17]:
# Create DataFrame

# Define a function to compute ranks with average ranking for ties
def rank_accuracies(group):
    # Sort by accuracy
    sorted_group = group.sort_values('accuracy')
    # Compute ranks
    sorted_group['rank'] = sorted_group['accuracy'].rank(method='average')
    return sorted_group

# Apply ranking within each dataset_name
ranked_df = melted_df.groupby('dataset_name').apply(rank_accuracies).reset_index(drop=True)

# Display the ranked DataFrame
print(ranked_df)

                   dataset_name classifier_name    accuracy  rank
0              AppliancesEnergy       AutoGluon    2.155978   1.0
1              AppliancesEnergy          Rocket    2.299031   2.0
2              AppliancesEnergy           FLAML    2.516904   3.0
3              AppliancesEnergy             FCN    2.865684   4.0
4              AppliancesEnergy          ResNet    3.065047   5.0
5              AppliancesEnergy   SVR Optimised    3.454574   6.0
6              AppliancesEnergy   Random Forest    3.455120   7.0
7              AppliancesEnergy             SVR    3.457631   8.0
8              AppliancesEnergy         XGBoost    3.489024   9.0
9              AppliancesEnergy       5-NN-DTWD    4.019873  10.0
10             AppliancesEnergy         5-NN-ED    4.227438  11.0
11             AppliancesEnergy       Inception    4.435330  12.0
12             AppliancesEnergy         1-NN-ED    5.231953  13.0
13             AppliancesEnergy            FPCR    5.405052  14.5
14        

In [18]:
# Compute average rank per classifier
average_rank_per_classifier = ranked_df.groupby('classifier_name')['rank'].mean().reset_index()
average_rank_per_classifier = average_rank_per_classifier.sort_values(by='rank', ascending=True).reset_index(drop=True)

# Extract lists
classifier_names = average_rank_per_classifier['classifier_name'].tolist()
average_ranks = average_rank_per_classifier['rank'].tolist()

# Display the lists
print("Classifier Names:", classifier_names)
print("Average Ranks:", average_ranks)

# Sort by average rank in descending order


average_rank_per_classifier

Classifier Names: ['Rocket', 'AutoGluon', 'FCN', 'XGBoost', 'FLAML', 'Random Forest', 'ResNet', 'Inception', 'FPCR', 'FPCR-Bspline', '5-NN-DTWD', '5-NN-ED', 'SVR Optimised', 'SVR', '1-NN-DTWD', '1-NN-ED']
Average Ranks: [4.454545454545454, 5.7272727272727275, 6.090909090909091, 6.090909090909091, 6.2272727272727275, 6.636363636363637, 6.7272727272727275, 6.818181818181818, 9.090909090909092, 9.318181818181818, 9.363636363636363, 10.545454545454545, 10.954545454545455, 12.409090909090908, 12.545454545454545, 13.0]


Unnamed: 0,classifier_name,rank
0,Rocket,4.454545
1,AutoGluon,5.727273
2,FCN,6.090909
3,XGBoost,6.090909
4,FLAML,6.227273
5,Random Forest,6.636364
6,ResNet,6.727273
7,Inception,6.818182
8,FPCR,9.090909
9,FPCR-Bspline,9.318182
