In [1]:
# Import necessary libraries
import pandas as pd  # For data manipulation and analysis
import numpy as np  # For numerical operations

# Import metrics from scikit-learn for model evaluation
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score


## Task Initialization and Feature Selection

In [2]:
# Read in the data
train_set = pd.read_csv('Data/train_processed.csv')
dev_set = pd.read_csv('Data/dev_processed.csv')
test_set = pd.read_csv('Data/test_processed.csv')

# Feature columns
columns_1 = ['issue_area_Attorneys', 'issue_area_Civil Rights', 'issue_area_Criminal Procedure', 'issue_area_Due Process', 'issue_area_Economic Activity', 'issue_area_Federal Taxation', 'issue_area_Federalism', 'issue_area_First Amendment', 'issue_area_Interstate Relations', 'issue_area_Judicial Power', 'issue_area_Miscellaneous', 'issue_area_Privacy', 'issue_area_Private Action', 'issue_area_UNKNOWN', 'issue_area_Unions',
            'file_year_2000_2010', 'file_year_2010_2020', 'file_year_before_2000', 'argument_quarter_1.0', 'argument_quarter_2.0', 'argument_quarter_3.0', 'argument_quarter_4.0', 'argument_quarter_nan',
            'court_hearing_length', 'utterances_number','lagged_time', 'speech_rate',]
columns_2 = ['successful_appeal']

# Filter the columns for the train and dev sets
final_columns = columns_1 + [f'embedding_{i}' for i in range(1, 385)] + columns_2
train_set = train_set[final_columns]
dev_set = dev_set[final_columns]

# Filter the columns for the test set
final_columns.remove('successful_appeal')
test_set = test_set[final_columns]

## Baseline Model

### Majority Class Baseline

In [3]:
# Step 1: Find the majority class in the training set
majority_class = train_set['successful_appeal'].value_counts().idxmax()

# Step 2: Predict the majority class for all samples in the development set
majority_preds = [majority_class] * len(dev_set)

# Step 3: Evaluate with accuracy, precision, recall, F1-score, and ROC-AUC
print("Majority Class Baseline Performance:")
print(classification_report(dev_set['successful_appeal'], majority_preds, target_names=['Affirm', 'Reverse']))
try:
    roc_auc = roc_auc_score(dev_set['successful_appeal'], majority_preds)
    print(f"ROC-AUC: {roc_auc:.4f}")
except ValueError:
    print("ROC-AUC: Not applicable (only one class predicted)")


Majority Class Baseline Performance:
              precision    recall  f1-score   support

      Affirm       0.00      0.00      0.00       210
     Reverse       0.64      1.00      0.78       367

    accuracy                           0.64       577
   macro avg       0.32      0.50      0.39       577
weighted avg       0.40      0.64      0.49       577

ROC-AUC: 0.5000


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


### Random Guessing Baseline

In [4]:
# Step 1: Predict randomly (0 or 1) for each sample in the development set
random_preds = np.random.randint(0, 2, len(dev_set))

# Step 2: Evaluate with accuracy, precision, recall, F1-score, and ROC-AUC
print("Random Guessing Baseline Performance:")
print(classification_report(dev_set['successful_appeal'], random_preds, target_names=['Affirm', 'Reverse']))
roc_auc = roc_auc_score(dev_set['successful_appeal'], random_preds)
print(f"ROC-AUC: {roc_auc:.4f}")


Random Guessing Baseline Performance:
              precision    recall  f1-score   support

      Affirm       0.39      0.58      0.46       210
     Reverse       0.66      0.48      0.56       367

    accuracy                           0.51       577
   macro avg       0.53      0.53      0.51       577
weighted avg       0.56      0.51      0.52       577

ROC-AUC: 0.5279
