In [2]:
dataset_num = 2
subset_num = 1

# Voting Classifier

## Contents

- [Imports](#imports)
- [Load DataFrames](#load-data)
- [Decide Majority Vote](#decide-majority-vote)
- [Create Submission DF](#create-submission)
- [Save Submission](#save-submission)

**This notebook takes all the predictions and returns a new csv, with the majority prediction for each passenger ID**

# Imports

In [3]:
import pandas as pd
import numpy as np

# Load DataFrames

In [4]:
model_names = ['RF', 'XGB', 'LGBM', 'GB']
submission_dict = {}

for name in model_names:
    path = f'../../data/submissions/data_{dataset_num}/{dataset_num}_{subset_num}/{name}_train_test_data_{dataset_num}_{subset_num}.csv'
    
    df = pd.read_csv(path)
    
    submission_dict[name] = df

In [6]:
dfs = []

for name, df in submission_dict.items():
    df = df.rename(columns={'Transported': name})
    dfs.append(df)

merged_df = pd.concat(dfs, axis=1).loc[:,~pd.concat(dfs, axis=1).columns.duplicated()].reset_index(drop=False)


df = merged_df[['PassengerId', 'RF', 'XGB', 'LGBM', 'GB']]

In [7]:
df.head(3)

Unnamed: 0,PassengerId,RF,XGB,LGBM,GB
0,0013_01,False,True,True,True
1,0018_01,False,False,False,False
2,0019_01,True,True,True,True


# Decide Majority Vote

In [8]:
df['Transported'] = df.drop(columns=['PassengerId']).apply(lambda row: row.value_counts().idxmax(), axis=1)

In [9]:
df.head(3)

Unnamed: 0,PassengerId,RF,XGB,LGBM,GB,Transported
0,0013_01,False,True,True,True,True
1,0018_01,False,False,False,False,False
2,0019_01,True,True,True,True,True


# Create Submission

In [10]:
submission = df[['PassengerId', 'Transported']]
submission.head(3)

Unnamed: 0,PassengerId,Transported
0,0013_01,True
1,0018_01,False
2,0019_01,True


# Save Submission

In [12]:
submission.to_csv(f'../../data/submissions/data_{dataset_num}/{dataset_num}_{subset_num}/VC_train_test_data_{dataset_num}_{subset_num}.csv', index=False)
display("Submission file generated successfully.")

'Submission file generated successfully.'