# Mixed-Bag Model for Text Classification

## Import necessary libraries

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

## Load the Dataset

In [2]:
lgbm_preds = pd.read_csv('Outputs/lgbm_probs.csv')['probs'].to_numpy()
svc_preds = pd.read_csv('Outputs/svc_probs.csv')['probs'].to_numpy()

In [3]:
lgbm_preds

array([0.55802891, 0.38647983, 0.55749522, 0.26699867, 0.25808114,
       0.67450369, 0.33166712, 0.56364065, 0.62239109, 0.27338185,
       0.25808114, 0.25808114, 0.3525049 , 0.30840639, 0.44403226,
       0.48201303, 0.38938914, 0.66983997, 0.58054353, 0.68393333,
       0.36128527, 0.49474211, 0.7051438 , 0.71409666, 0.25808114,
       0.26703429, 0.25808114, 0.51920893, 0.71667573, 0.72035231,
       0.63379587, 0.39555719, 0.28039396, 0.28648703, 0.71409666,
       0.63280224, 0.720729  , 0.61469382, 0.66983997, 0.26703429,
       0.70893603, 0.66704769, 0.49335709, 0.68890004, 0.5055468 ,
       0.66704769, 0.35552054, 0.28097454, 0.60590107, 0.34747906,
       0.25808114, 0.6252036 , 0.49024257, 0.39789591, 0.71667573,
       0.68393333, 0.63138134, 0.52855982, 0.50680306, 0.30174685,
       0.72035231, 0.41658152, 0.46177893, 0.52046462, 0.4377836 ,
       0.54952326, 0.39786679, 0.25808114, 0.59864149, 0.42214598,
       0.62538671, 0.51852737, 0.49335709, 0.49335709, 0.70514

In [4]:
svc_preds

array([9.25517524e-01, 1.69166237e-03, 8.26578294e-01, 4.46103661e-04,
       6.73242387e-05, 9.55156435e-01, 7.20422820e-01, 1.73278841e-01,
       9.12820693e-01, 4.92996564e-02, 2.76577843e-02, 2.48444050e-02,
       2.06607780e-01, 3.88027751e-02, 7.09639946e-01, 5.85800580e-01,
       1.56481184e-01, 6.32832782e-01, 9.78220095e-01, 9.34870092e-01,
       2.53003282e-02, 5.41301173e-01, 9.38823852e-01, 9.03887727e-01,
       1.59753352e-01, 1.74498292e-04, 3.36977532e-05, 7.61426863e-01,
       9.15502837e-01, 9.43816136e-01, 7.91094670e-01, 6.33276870e-01,
       7.13691980e-07, 3.49975424e-01, 9.37296957e-01, 6.13959753e-01,
       9.83716559e-01, 8.45882562e-01, 8.82796465e-01, 4.63530567e-02,
       9.78050275e-01, 9.35124868e-01, 1.08918910e-01, 9.78074899e-01,
       4.22039311e-01, 7.29173293e-01, 7.07597238e-01, 7.86946783e-01,
       6.42940125e-01, 4.29373746e-03, 5.59710365e-07, 7.43626251e-01,
       4.11109166e-01, 1.40094852e-01, 9.67165700e-01, 8.52642263e-01,
      

## Combine the preds to output final predictions

In [5]:
final_preds = []

In [6]:
for lgbm, svc in zip(lgbm_preds, svc_preds):
    # add 0 if confident about it
    if min(lgbm, svc) < 0.3:
        final_preds.append(0)
    # add 1 if confident about it
    elif max(lgbm, svc) > 0.7:
        final_preds.append(1)
    # else add the (avg of scores // 0.5)
    else:
        final_preds.append(int(np.mean([lgbm, svc]) // 0.5))

In [7]:
final_preds

[1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 0,


In [8]:
final_output = pd.DataFrame({'id': range(len(final_preds)), 'class': final_preds})
final_output.to_csv('Outputs/mixed_output_1.csv', index = False)

In [9]:
final_output['class'].value_counts()

class
0    522
1    478
Name: count, dtype: int64