# Training

In [18]:
from fraudetect.dataset import load_data
from fraudetect.preprocessing.preprocessing import  load_workflow
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline


In [68]:
import torch.nn as nn
import torch
from unet.unet import UNet
import numpy as np
from skorch import NeuralNetClassifier

class UNet1DFraudClassifier(nn.Module):
    def __init__(self, in_channels, out_classes=1, num_encoding_blocks=3):
        super().__init__()
        self.unet = UNet(
            in_channels=in_channels,
            out_classes=out_classes,
            dimensions=1,
            num_encoding_blocks=num_encoding_blocks,
            out_channels_first_layer=12,
            dropout=0.2,
            normalization='batch',
            pooling_type='max',
            upsampling_type='conv',
            padding=1,
            activation='ReLU'
        )
        self.softmax = nn.LogSoftmax(dim=1)

    def forward(self, x):
        x = self.unet(x)
        return self.softmax(x)

In [69]:
cols_to_drop = [
                'CurrencyCode',
                'CountryCode',
                'BatchId',
                'CUSTOMER_ID',
                'TRANSACTION_ID',
                'TX_DATETIME',
                'TX_TIME_DAYS',
                'SubscriptionId',
                'AccountId'
                ]


interaction_cat_cols= [ 
                        'CustomerUID',
                        'ChannelId',
                        'PricingStrategy',
                        'ProductId',
                        'ProviderId'
                    ]

uid_cols=['AccountId','CUSTOMER_ID'] # [None,]

uid_col_name="CustomerUID"

cat_similarity_encode = None # ['ProductCategory',] # None

In [109]:
workflow = load_workflow(
        classifier=None,
        cols_to_drop=cols_to_drop,
        pca_n_components=20,
        detector_list=None,
        n_splits=5,
        cv_gap=5000,
        scoring="f1",
        onehot_threshold=9,
        session_gap_minutes=60 * 3,
        uid_cols=uid_cols,
        uid_col_name=uid_col_name,
        add_fraud_rate_features = True,
        reorder_by=['TX_DATETIME',], #['TX_DATETIME','AccountId']
        behavioral_drift_cols=[
            "AccountId",
        ],
        feature_selector_name = 'smartcorrelated', # None selectkbest smartcorrelated
        feature_select_estimator=None,
        corr_method="spearman", # spearman
        corr_threshold = 0.81,
        top_k_best=60,
        windows_size_in_days=[1, 7, 30],
        cat_encoding_method= "binary",
        cat_similarity_encode=cat_similarity_encode,
        nlp_model_name='en_core_web_md',
        cluster_on_feature="AccountId", # not used
        add_poly_interactions=True,
        add_cum_features=True,
        interaction_cat_cols=interaction_cat_cols,
        poly_degree=1,
        poly_cat_encoder_name="binary",
        add_fft=False,
        add_seasonal_features=False,
        use_nystrom=False,
        nystroem_components=20,
        use_sincos=True,
        use_spline=False,
        add_imputer=False,
        do_pca=False,
        n_jobs=2,
)
workflow

In [138]:
import torch
from torch.utils.data import Dataset
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from tqdm import tqdm

class GroupedTimeSeriesDataset(Dataset):
    def __init__(self, 
                 df:pd.DataFrame, 
                 preprocessor:Pipeline,
                 group_col='AccountId', 
                 seq_len=100, 
                 overlap=0.2):
        
        self.seq_len = seq_len
        self.samples = []
        self.preprocessor = preprocessor

        # Calculate step size based on overlap
        step_size = max(1, int(seq_len * (1 - overlap)))

        df = load_data(r"D:\fraud-detection-galsen\data\training.csv")

        # fit preprocessor on the entire dataset     
        print('Fitting preprocessor on the entire dataset...')  
        X_train = df.drop(columns=['TX_FRAUD'])
        y_train = df['TX_FRAUD']
        X_preprocessed = self.preprocessor.fit_transform(X_train,
                                                  y_train)
        
        # X_preprocessed = pd.DataFrame(X_train,columns=[f"col_{i}" for i in range(X_train.shape[1])]).convert_dtypes()
        X_preprocessed[group_col] = df[group_col]
        X_preprocessed['TX_DATETIME'] = df['TX_DATETIME']
        X_preprocessed['TX_FRAUD'] = df['TX_FRAUD']

        # Group by AccountId
        grouped = (X_preprocessed
                   .sort_values(by=['TX_DATETIME'])
                   .drop(columns=['TX_DATETIME'])
                   .groupby(group_col))

        for _, group in tqdm(grouped,desc="Processing groups"):
            
            # Extract features and targets
            features = group.drop(columns=['TX_FRAUD',group_col]).values.astype(np.float32)
            targets = group['TX_FRAUD'].values.astype(np.int8)

            # Generate sliding windows
            for i in range(0, len(group) - seq_len + 1, step_size):
                x_seq = features[i:i+seq_len,:]
                y_seq = targets[i:i+seq_len]  # target at the end of the sequence
                self.samples.append((x_seq, y_seq))

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):

        (x_seq, y_seq) = self.samples[idx]
        
        # # to torch.Tensor
        x_seq = torch.tensor(x_seq, dtype=torch.float32)
        y_seq =  torch.tensor(y_seq, dtype=torch.long)

        return x_seq, y_seq


In [139]:
unet = UNet1DFraudClassifier(in_channels=131, out_classes=1)

sum(p.numel() for p in unet.parameters() if p.requires_grad)

33061

In [140]:
train_data = load_data(r"D:\fraud-detection-galsen\data\training.csv")

X_train = train_data.drop(columns=['TX_FRAUD'])
y_train = train_data['TX_FRAUD']

In [None]:
X_t = workflow.fit_transform(X_train, y_train)

In [None]:
X_t = X_t.convert_dtypes()
X_t.info()

In [142]:
dataset = GroupedTimeSeriesDataset(df=train_data,
                                   preprocessor=workflow,
                                   group_col='AccountId',
                                   seq_len=100,
                                   overlap=0.2
                                  )

Fitting preprocessor on the entire dataset...


Processing groups: 100%|██████████| 3633/3633 [00:11<00:00, 304.94it/s]


In [144]:
dataset.samples[0]

(array([[1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 0., 1., 1.],
        [0., 0., 1., ..., 0., 1., 1.],
        [0., 1., 0., ..., 1., 0., 0.]], shape=(100, 132), dtype=float32),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8))

In [146]:
x.shape, y.shape

(torch.Size([100, 132]), torch.Size([100]))

In [61]:
sk_unet = NeuralNetClassifier(
            UNet1DFraudClassifier,
            module__in_channels=50,
            criterion=nn.NLLLoss,
            optimizer=torch.optim.Adam,
            lr=3e-4,
            max_epochs=20,
            batch_size=64,
            iterator_train__shuffle=True,
            device='cuda' if torch.cuda.is_available() else 'cpu',
        )

In [64]:
pred = unet(torch.Tensor(x_in))
pred.shape

torch.Size([1, 1, 500])

### Dimensionality reduction

There are interesting methods to try:
- AlignedUMAP -> https://umap-learn.readthedocs.io/en/latest/aligned_umap_politics_demo.html
- autoencoders (e.g. VAE)


In [None]:
from pyod.models.vae import VAE
import umap
from fraudetect.dataset import load_data
from fraudetect.preprocessing import load_workflow
from fraudetect.config import COLUMNS_TO_DROP
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import minmax_scale
import joblib
import numpy as np

In [None]:
# load data

raw_data_train = load_data("../data/training.csv")

raw_data_pred = load_data("../data/test.csv")

In [None]:
# preprocessor
data_preprocessor = load_workflow(
    classifier=None,
    cols_to_drop=COLUMNS_TO_DROP,
    pca_n_components=80,
    detector_list=None,  # model_list,
    session_gap_minutes=60 * 3,
    uid_cols=[
        None,
    ],
    add_imputer=False,
    reorder_by=['TX_DATETIME'],
    feature_selector_name='None',  # "selectkbest",
    top_k_best=50,
    windows_size_in_days=[1, 7, 30],
    cat_encoding_method='binary',
    imputer_n_neighbors=9,
    n_clusters=0,
    do_pca=False,
    verbose=True,
    n_jobs=1,
    add_fft=False,
    add_seasonal_features=False,
    use_nystrom=False,
    nystroem_components=20,
    nystroem_kernel="poly",
    use_sincos=False,
    use_spline=True,
    spline_degree=3,
    spline_n_knots=6,
)

# Data
y_train = raw_data_train["TX_FRAUD"]
X_train = raw_data_train.drop(columns=['TX_FRAUD'])


In [None]:
y_train

In [None]:
X_train.head()

In [None]:
data_preprocessor

In [None]:
X_t = data_preprocessor.fit_transform(X=X_train,y=y_train)

In [None]:
from sklearn.base import TransformerMixin, BaseEstimator

In [None]:
def get_params(estimator:BaseEstimator|TransformerMixin):
    
    params = estimator.get_params()

    if not all([isinstance(v,BaseEstimator) for v in params.values()]):
        return params
    
    elif not all([isinstance(v,TransformerMixin) for v in params.values()]):
        return params

    elif 'steps' in params.key():
        for v in params['steps']:
            if isinstance(v,BaseEstimator) or isinstance(v,TransformerMixin):
                return get_params(v)

In [None]:
data_preprocessor[0].get_params()

In [None]:
data_preprocessor.get_params()

In [None]:
# non - fraudulent
X_normal = X_train.loc[y_train<1.,:].reset_index(drop=True)
X_normal.head()

In [None]:
y_normal = y_train.loc[y_train<1].reset_index(drop=True)
y_normal

In [None]:
X_normal_preprocessed = data_preprocessor.fit_transform(X=X_normal,y=y_normal)


X_normal_preprocessed


In [None]:
X_train_preprocessed = data_preprocessor.transform(X=X_train)
X_train_preprocessed

In [None]:
np.isnan(X_normal_preprocessed).sum()

#### VAE

In [None]:
# VAE model
vae_autoencoder = VAE(contamination=1e-3,verbose=2,epoch_num=30,
                      batch_norm=True,
                      latent_dim=10,
                      optimizer_params={'weight_decay': 1e-04},
                      output_activation_name='relu',
                      random_state=41
                      )
vae_autoencoder

In [None]:
vae_autoencoder.load('../models/vae_autoencoder.joblib')

In [None]:
# fit and save
X_normal_preprocessed_scaled = minmax_scale(X_normal_preprocessed,feature_range=(0,1))

vae_autoencoder.fit(X_normal_preprocessed_scaled)

In [None]:
# save
vae_autoencoder.save('../models/vae_autoencoder.joblib')

#### UMAP

In [None]:
# umap
import pandas as pd
from umap import AlignedUMAP

In [None]:


# 1) Suppose you have a DataFrame `df` with:
#    - 'AccountId', 'TransactionStartTime' (datetime), plus feature columns
feature_cols = ['Amount', 'TimeSinceLastTxn', 'Txn1hCount']  # your engineered features

# 2) Create two time slices
df['ts'] = pd.to_datetime(df['TransactionStartTime'])
slice1 = df[(df.ts >= '2024-01-01') & (df.ts < '2024-04-01')]
slice2 = df[(df.ts >= '2024-04-01') & (df.ts < '2024-07-01')]

# 3) Extract feature matrices and account labels
X1, ids1 = slice1[feature_cols].values, slice1['AccountId'].values
X2, ids2 = slice2[feature_cols].values, slice2['AccountId'].values

# 4) Build alignment map: list of (index_in_X1, index_in_X2) for shared accounts
alignment = []
for i, acct in enumerate(ids1):
    if acct in ids2:
        j = list(ids2).index(acct)
        alignment.append((i, j))

# 5) Run AlignedUMAP
au = AlignedUMAP(
    n_neighbors=15,
    n_components=2,
    alignment_window_size=1
)
embeddings = au.fit_transform([X1, X2], alignment=alignment)

# `embeddings` is a list of two (N1×2) and (N2×2) arrays
emb1, emb2 = embeddings

# 6) (Optional) merge back for plotting
out1 = pd.DataFrame(emb1, columns=['x','y'], index=slice1.index)
out2 = pd.DataFrame(emb2, columns=['x','y'], index=slice2.index)
viz1 = slice1.join(out1); viz2 = slice2.join(out2)


# Inference

**TODO**: calibrate the classifier
- https://scikit-learn.org/stable/modules/calibration.html#calibration

In [None]:
import joblib
from fraudetect.config import load_args_from_json
from fraudetect.dataset import load_data
from pathlib import Path
# from fraudetect.preprocessing import FraudFeatureEngineer, FeatureEncoding
# from fraudetect.dataset import MyDatamodule, load_data
# from fraudetect.config import Arguments
import pandas as pd
from datetime import datetime, date
from sklearn.model_selection import (TimeSeriesSplit,
                                     TunedThresholdClassifierCV)
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import (
    StackingClassifier
)
from sklearn.frozen import FrozenEstimator
import os
import json

from sklearn.linear_model import LogisticRegression, LogisticRegressionCV


In [None]:

clf_path = r"D:\fraud-detection-galsen\runs-optuna\ensemble-trees-1_2025-04-21_20-00_best-run.joblib"
    
run = joblib.load(clf_path)

run

[[Pipeline(steps=[('feature_engineer',
                   FraudFeatureEngineer(cluster_on_feature='AccountId',
                                        reorder_by=['TX_DATETIME', 'AccountId'],
                                        session_gap_minutes=1140,
                                        uid_cols=('AccountId', 'CUSTOMER_ID'),
                                        windows_size_in_days=(1, 7, 30))),
                  ('dropper',
                   DropFeatures(features_to_drop=['CurrencyCode', 'CountryCode',
                                                  'SubscriptionId', 'BatchId',
                                                  'CUSTOMER_ID', 'AccountId',
                                                  'TRAN...
                   SmartCorrelatedSelection(cv=TimeSeriesSplit(gap=5255, max_train_size=None, n_splits=5, test_size=None),
                                            estimator=DecisionTreeClassifier(class_weight='balanced',
                                    

In [None]:
# joblib.load(r"D:\fraud-detection-galsen\runs-optuna\decisionTree_2025-04-20_03-41.joblib")

[FrozenTrial(number=119, state=1, values=[0.8018810144016373], datetime_start=datetime.datetime(2025, 4, 20, 5, 43, 40, 711393), datetime_complete=datetime.datetime(2025, 4, 20, 5, 44, 37, 15282), params={'classifier': 'randomForest', 'cat_encoding_method': 'binary', 'pca': False, 'disable_pyod': True, 'select_features': True, 'feature_selector_name': 'smartcorrelated', 'smartcorrelated__method': 'spearman', 'smartcorrelated__threshold': 0.77, 'smartcorrelated__scoring': 'f1', 'add_fft': False, 'add_seasonal_features': False, 'use_nystrom': False, 'use_sincos': False, 'use_spline': False, 'session_gap_minutes': 1140, 'poly_degree_interact': 1, 'randomForest_model__n_estimators': 7, 'randomForest_model__criterion': 'entropy', 'randomForest_model__max_depth': 4, 'randomForest_model__min_samples_split': 2, 'randomForest_model__min_samples_leaf': 1, 'randomForest_model__class_weight': 'balanced', 'randomForest_model__max_features': None, 'randomForest_model__random_state': None, 'tune_thre

In [None]:
args, cfg = load_args_from_json(
    r"D:\fraud-detection-galsen\runs-optuna\ensemble-trees-1_2025-04-21_20-00.json"
)

In [62]:
clf = run[0][0]
clf

In [None]:
# args.__dict__

{'data_path': 'D:\\fraud-detection-galsen\\tools\\..\\data\\training.csv',
 'study_name': 'decisionTree_2025-04-20_03-41',
 'work_dir': 'D:\\fraud-detection-galsen\\tools\\..\\runs-optuna',
 'run_name': 'debug',
 'reorder_by': ['TX_DATETIME', 'AccountId'],
 'delta_train': 50,
 'delta_delay': 7,
 'delta_test': 20,
 'random_state': 41,
 'windows_size_in_days': [1, 7, 30],
 'sampler_names': None,
 'sampler_cfgs': None,
 'model_names': ['decisionTree', 'randomForest'],
 'session_gap_minutes': 180,
 'onehot_threshold': 6,
 'pyod_detectors': ['abod', 'cblof', 'hbos', 'iforest', 'knn', 'loda', 'mcd'],
 'disable_pyod_outliers': True,
 'disable_samplers': True,
 'do_pca': False,
 'do_poly_expansion': False,
 'do_feature_selection': True,
 'cv_n_iter': 200,
 'cv_gap': 5255,
 'cv_method': 'optuna',
 'n_splits': 5,
 'n_jobs': 10,
 'scoring': ['f1', 'average_precision'],
 'cat_encoding_method': 'binary',
 'cat_encoding_methods': ['binary', 'catboost', 'count', 'target_enc', 'woe'],
 'cat_encoding_b

In [64]:
raw_data_train = load_data("../data/training.csv")

raw_data_pred = load_data("../data/test.csv")

In [65]:
raw_data_train.columns

Index(['TRANSACTION_ID', 'BatchId', 'AccountId', 'SubscriptionId',
       'CUSTOMER_ID', 'CurrencyCode', 'CountryCode', 'ProviderId', 'ProductId',
       'ProductCategory', 'ChannelId', 'TX_AMOUNT', 'Value', 'TX_DATETIME',
       'PricingStrategy', 'TX_FRAUD', 'TX_TIME_DAYS'],
      dtype='object')

In [66]:
raw_data_pred.shape

(45019, 16)

In [67]:
y_pred_origin = clf.predict(raw_data_pred)

y_pred_origin.sum()

np.float64(96.0)

In [None]:
X = raw_data_train.drop(columns=['TX_FRAUD'])
y = raw_data_train['TX_FRAUD']

In [None]:
cv=TimeSeriesSplit(n_splits=4,gap=5000)

# Stacking
final_estimator=LogisticRegressionCV(Cs=np.logspace(1,4,5),cv=cv,
                                        scoring='average_precision',
                                        solver='liblinear',
                                        )
clf_stacking =  StackingClassifier([(str(i),pipe) for i,pipe in enumerate(run)],
                                    final_estimator=final_estimator,
                                        n_jobs=5,
                                        cv='prefit')

clf_stacking.fit(raw_data_pred)
y_pred_stacked = clf_stacking.predict(raw_data_pred)

In [None]:
y_pred_stacked.sum(), y_pred_stacked.sum()/y_pred_stacked.shape[0]

In [None]:
# calibrated model
# clf_calibrated = CalibratedClassifierCV(FrozenEstimator(clf_stacking),
#                                  method='sigmoid',
#                                  n_jobs=6,
#                                  ensemble=True,
#                                  cv=TimeSeriesSplit(n_splits=3,gap=5000),
#                               )

# X = raw_data_train.drop(columns=['TX_FRAUD'])
# y = raw_data_train['TX_FRAUD']

# clf_calibrated.fit(X,y)
# y_pred_calibrated = clf_calibrated.predict(raw_data_pred)
# y_pred_calibrated.sum()

np.float64(87.0)

In [70]:
test_data = pd.read_csv("../data/test.csv")
test_data.head()

Unnamed: 0,TransactionId,BatchId,AccountId,SubscriptionId,CustomerId,CurrencyCode,CountryCode,ProviderId,ProductId,ProductCategory,ChannelId,Amount,Value,TransactionStartTime,PricingStrategy
0,TransactionId_50600,BatchId_35028,AccountId_2441,SubscriptionId_4426,CustomerId_2857,UGX,256,ProviderId_5,ProductId_3,airtime,ChannelId_3,1000.0,1000,2019-02-13T10:01:40Z,4
1,TransactionId_95109,BatchId_45139,AccountId_3439,SubscriptionId_2643,CustomerId_3874,UGX,256,ProviderId_5,ProductId_15,financial_services,ChannelId_3,2000.0,2000,2019-02-13T10:02:12Z,2
2,TransactionId_47357,BatchId_74887,AccountId_4841,SubscriptionId_3829,CustomerId_2857,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-50.0,50,2019-02-13T10:02:30Z,2
3,TransactionId_28185,BatchId_11025,AccountId_2685,SubscriptionId_4626,CustomerId_3105,UGX,256,ProviderId_5,ProductId_10,airtime,ChannelId_3,3000.0,3000,2019-02-13T10:02:38Z,4
4,TransactionId_22140,BatchId_29804,AccountId_4841,SubscriptionId_3829,CustomerId_3105,UGX,256,ProviderId_4,ProductId_6,financial_services,ChannelId_2,-60.0,60,2019-02-13T10:02:58Z,2


In [71]:
# make submission
submission = pd.read_csv("../data/sample_submission.csv")
submission.head()

Unnamed: 0,TransactionId,FraudResult
0,TransactionId_50600,
1,TransactionId_95109,
2,TransactionId_47357,
3,TransactionId_28185,
4,TransactionId_22140,


In [72]:
## Great same...
(test_data['TransactionId'] == submission['TransactionId']).sum()

np.int64(45019)

In [None]:
tag = 'origin'
submission['FraudResult'] = y_pred_stacked

submission['FraudResult'] = submission['FraudResult'].astype('int')
submission.head()

Unnamed: 0,TransactionId,FraudResult
0,TransactionId_50600,0
1,TransactionId_95109,0
2,TransactionId_47357,0
3,TransactionId_28185,0
4,TransactionId_22140,0


In [82]:
submission['FraudResult'].sum()

np.int64(87)

In [None]:
current_time = datetime.now().strftime("%H-%M")
# filename = f"submission_{str(date.today())}_{current_time}.csv"
filename = f"{Path(clf_path).stem}_{tag}.csv"
filename = os.path.join("../submissions",filename)

filename

'../submissions\\decisionTree_2025-04-20_03-41_best-run_calibrated.csv'

In [84]:
submission.to_csv(filename,index=False)

# Performance estimation
public test estimation

In [None]:
from scipy.optimize import minimize, Bounds
import pandas as pd
import numpy as np

In [None]:
# load data
y1 = pd.read_csv(r'..\submissions\submission_2025-04-18_18-29_EQiuFghN.csv')['FraudResult'].to_numpy()
y2 = pd.read_csv(r'..\submissions\submission_2025-04-16_15-03_ZnyWKEKm.csv')['FraudResult'].to_numpy()
y3 = pd.read_csv(r'..\submissions\submission_2025-04-14_23-29_roKRCvYs.csv')['FraudResult'].to_numpy()
y4 = pd.read_csv(r'..\submissions\submission_2025-04-16_15-06_nDW2jSbL.csv')['FraudResult'].to_numpy()
y5 = pd.read_csv(r'..\submissions\submission_2025-04-18_18-24_mTnW2tLv.csv')['FraudResult'].to_numpy()

f1_scores = dict(roKRCvYs=(0,y3),
                 ZnyWKEKm=(0.005135337,y2),
                 nDW2jSbL=(0.26519337,y4),
                 mTnW2tLv=(0.144092219,y5),
                 EQiuFghN=(0.666666666,y1),                 
                 )

In [None]:
# from sklearn.metrics import f1_score
# def f1_score(y_truth:np.array, y_pred:np.array):

#     tp = np.dot(y_truth,y_pred)
#     fp = np.dot(1-y_truth,y_pred)
#     fn = np.dot(y_truth,1-y_pred)

#     f1 = tp/(tp + 0.5*(fp+fn) + 1e-8)

#     return f1

def objective(x_0:np.ndarray):

    x_0 = x_0.round()

    fitness = sum([(f1_score(x_0,y) - f1) for f1,y in f1_scores.values()])

    return fitness


In [None]:
x0 = sum([y for f1,y in f1_scores.values()])/len(f1_scores)
x0

In [None]:
res = minimize(objective,
         x0=x0,
        #  method='CG',
         bounds=Bounds(0,1))

In [None]:
res

In [None]:
res.x

In [None]:
f1_score(res.x,y1)