In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt
import time

<font size=5> Load data </font>

In [None]:
JPfraud_full = pd.read_csv("data/fraud_payment_data")
JPfraud_full

In [None]:
# Converting Time_step to a datetime type
JPfraud_full = JPfraud_full.drop(columns=['Sender_lob'],axis=1)
JPfraud_full['timestamp'] = pd.to_datetime(JPfraud_full['Time_step'],format='%Y-%m-%d %H:%M:%S')
JPfraud_full = JPfraud_full.drop(columns=['Time_step'])
JPfraud_full.insert(0,'timestamp',JPfraud_full.pop('timestamp'))
JPfraud_full

In [4]:
JPfraud_full = JPfraud_full[JPfraud_full.USD_amount > 0]

In [5]:
JPfraud = JPfraud_full.reset_index(level=0,drop=True)

In [6]:
nanosecs_add = np.append(np.tile(np.arange(100000),10), np.arange(len(JPfraud)-1000000))
print(nanosecs_add)
print(len(nanosecs_add))

[     0      1      2 ... 498152 498153 498154]
1498155


In [None]:
JPfraud['timestamp_uniq'] = JPfraud['timestamp'] + pd.to_timedelta(nanosecs_add, unit='ns')
#JPfraud['timestamp_unique'] = JPfraud['timestamp'] + pd.to_timedelta(
JPfraud.insert(0,'timestamp_uniq',JPfraud.pop('timestamp_uniq'))
JPfraud

In [8]:
JPfraud.timestamp_uniq[JPfraud.timestamp_uniq.duplicated()]

Series([], Name: timestamp_uniq, dtype: datetime64[ns])

In [9]:
# Function to convert a time object to seconds after midnight
# (pandas will promote int to float due to NaNs in the columns fyi)
def convert_time_to_seconds(timeObj):
    return int((timeObj.hour)*60*60 + (timeObj.minute)*60 + (timeObj.second))

<font size=6> Time between transactions feature </font>

In [None]:
# Calculates time between previous transaction for each of seller, beneficiary
# (The mask.....etc code sets the sender_time_btwn, bene_time_btwn feature to 0 (of timedelta type)
# for the first occurrence an account appears. NaT's are still placed where a Sender_Account or
# Bene_Account entry is NaN.)

JPfraud['sender_time_btwn'] = (JPfraud.groupby('Sender_Account')['timestamp'].diff()
                                      .mask(JPfraud.groupby('Sender_Account')['timestamp'].cumcount().eq(0), dt.timedelta(0))
                              )
JPfraud['bene_time_btwn'] = (JPfraud.groupby('Bene_Account')['timestamp'].diff()
                                    .mask(JPfraud.groupby('Bene_Account')['timestamp'].cumcount().eq(0), dt.timedelta(0))
                            )

JPfraud

<font size=6> Expanding time features </font> <br>
Stats are updated at time of each transaction and represent entire history

In [11]:
# Gives minimum number of observations before assigning entries
min_obs = 3

In [None]:
start = dt.datetime.now()

# Suppresses pandas' warnings about chained assignment
pd.options.mode.chained_assignment = None

# Choosing quantile percentages for time differences between transactions
lower_time_diff_quantile = 0.15
upper_time_diff_quantile = 0.85

# Rolling quantiles for times between consecutive transactions
JPfraud['sender_time_diff_min'] = (JPfraud.groupby('Sender_Account')['sender_time_btwn']
                                          .apply(lambda x: x.dt.total_seconds().expanding(method='single',min_periods=min_obs)
                                                            .quantile(q=lower_time_diff_quantile,interpolation='midpoint'))
                                          .reset_index(level=0,drop=True)
                                  )

JPfraud['sender_time_diff_max'] = (JPfraud.groupby('Sender_Account')['sender_time_btwn']
                                          .apply(lambda x: x.dt.total_seconds().expanding(method='single',min_periods=min_obs)
                                                            .quantile(q=upper_time_diff_quantile,interpolation='midpoint'))
                                          .reset_index(level=0,drop=True)
                                  )

JPfraud['bene_time_diff_min'] = (JPfraud.groupby('Bene_Account')['bene_time_btwn']
                                        .apply(lambda x: x.dt.total_seconds().expanding(method='single',min_periods=min_obs)
                                                          .quantile(q=lower_time_diff_quantile,interpolation='midpoint'))
                                        .reset_index(level=0,drop=True)
                                )

JPfraud['bene_time_diff_max'] = (JPfraud.groupby('Bene_Account')['bene_time_btwn']
                                        .apply(lambda x: x.dt.total_seconds().expanding(method='single',min_periods=min_obs)
                                                          .quantile(q=upper_time_diff_quantile,interpolation='midpoint'))
                                        .reset_index(level=0,drop=True)
                                )

# outside_sender_time_diff_range, outside_bene_time_diff_range: Classifying whether time between consecutive
# transaction is outside of the quantile range above
JPfraud = JPfraud.assign(outside_sender_time_diff_range = lambda x: (
            (x.sender_time_btwn.dt.total_seconds() < x.sender_time_diff_min) | (x.sender_time_btwn.dt.total_seconds() > x.sender_time_diff_max))*1)
JPfraud = JPfraud.assign(outside_bene_time_diff_range = lambda x: (
            (x.bene_time_btwn.dt.total_seconds() < x.bene_time_diff_min) | (x.bene_time_btwn.dt.total_seconds() > x.bene_time_diff_max))*1)

JPfraud

In [13]:
print('Time elasped: ', dt.datetime.now() - start)

Time elasped:  0:05:29.558750


In [None]:
pd.options.mode.chained_assignment = None
start = dt.datetime.now()

# Choosing quantile percentages
lower_time_quantile = 0.15
upper_time_quantile = 0.85

# seconds_in_day: time transaction occurred in seconds after midnight
JPfraud['seconds_in_day'] = JPfraud.timestamp.transform(lambda x: float(convert_time_to_seconds(x.time())))

# time_min_out: rolling lower quantile for range of time of SENDER transactions (in seconds after midnight)
JPfraud['time_min_out'] = (JPfraud.groupby('Sender_Account')['seconds_in_day']
                                  .apply(lambda x: x.expanding(method='single',min_periods=min_obs)
                                                    .quantile(q=lower_time_quantile,interpolation='midpoint'))
                                  .reset_index(level=0,drop=True)
                          )

# time_max_out: rolling upper quantile for range of time of SENDER transactions (in seconds after midnight)
JPfraud['time_max_out'] = (JPfraud.groupby('Sender_Account')['seconds_in_day']
                                  .apply(lambda x: x.expanding(method='single',min_periods=min_obs)
                                                    .quantile(q=upper_time_quantile,interpolation='midpoint'))
                                  .reset_index(level=0,drop=True)
                          )

# time_min_in: rolling lower quantile for range of time of BENEFICIARY transactions (in seconds after midnight)
JPfraud['time_min_in'] = (JPfraud.groupby('Bene_Account')['seconds_in_day']
                                 .apply(lambda x: x.expanding(method='single',min_periods=min_obs)
                                                   .quantile(q=lower_time_quantile,interpolation='midpoint'))
                                 .reset_index(level=0,drop=True)
                         )

# time_max_in: rolling upper quantile for range of time of BENEFICIARY transactions (in seconds after midnight)
JPfraud['time_max_in'] = (JPfraud.groupby('Bene_Account')['seconds_in_day']
                                 .apply(lambda x: x.expanding(method='single',min_periods=min_obs)
                                                   .quantile(q=upper_time_quantile,interpolation='midpoint'))
                                 .reset_index(level=0,drop=True)
                         )

# outside_time_bene_range:   indicator of whether transaction is outside range for beneficiary at
#                            time of transaction, 0 no, 1 yes
JPfraud = JPfraud.assign(outside_time_bene_range = lambda x: ((x.seconds_in_day < x.time_min_in) | (x.seconds_in_day > x.time_max_in))*1)

# outside_time_sender_range: indicator of whether transaction is outside usual time range for sender
#                            at time of transaction, 0 no, 1 yes
JPfraud = JPfraud.assign(outside_time_sender_range = lambda x: ((x.seconds_in_day < x.time_min_out) | (x.seconds_in_day > x.time_max_out))*1)

JPfraud

In [15]:
print('Time elasped: ', dt.datetime.now() - start)

Time elasped:  0:02:22.124026


<font size=6> Rolling time features </font> <br>
Stats are updated at time of each transaction, but only for a rolling window of time; for instance within the last 180 days.

In [16]:
# Sets the rolling period for the rolling stat features
rolling_period = '90D' # D for days

# Gives minimum number of observations before assigning entries
min_obs = 3

<font size=6> Bursts of Transactions feature </font>

In [17]:
start = dt.datetime.now()

In [None]:
# Window within to count the number of transactions
burst_period = '1D' # previous 24 hours/1 day

# Number of transactions within a previous window of time given by burst_period
JPfraud['sender_burst_num'] = (JPfraud.set_index('timestamp')
                                      .groupby('Sender_Account',group_keys=False)['Sender_Account']
                                      .apply(lambda x: x.rolling(window=burst_period).count())
                                      .reset_index(level=0,drop=True)
                              )

JPfraud['bene_burst_num'] = (JPfraud.set_index('timestamp')
                                    .groupby('Bene_Account',group_keys=False)['Bene_Account']
                                    .apply(lambda x: x.rolling(window=burst_period).count())
                                    .reset_index(level=0,drop=True)
                            )
JPfraud

In [19]:
print('Time elasped: ', dt.datetime.now()-start)

Time elasped:  0:03:55.082504


In [20]:
# Collection of columns to drop for specific purposes
drop_cols = ['Transaction_Id','Sender_Id','Sender_Sector','Sender_Country','Bene_Id',
             'Bene_Country','Label','timestamp_uniq','timestamp']

drop_cols_fit_exp = ['timestamp','Transaction_Id','Sender_Id','Sender_Sector','Sender_Country','Bene_Id',
                 'Bene_Country','Transaction_Type','Label','seconds_in_day','sender_time_diff_min',
                 'sender_time_diff_max','bene_time_diff_min','bene_time_diff_max','time_min_out',
                 'time_max_out','time_min_in','time_max_in']

drop_cols_fit = ['timestamp','Transaction_Id','Sender_Id','Sender_Sector','Sender_Country','Bene_Id',
                 'Bene_Country','seconds_in_day','sender_time_diff_min',
                 'sender_time_diff_max','bene_time_diff_min','bene_time_diff_max','time_min_out',
                 'time_max_out','time_min_in','time_max_in','rolling_sender_time_diff_min',
                 'rolling_sender_time_diff_max','rolling_bene_time_diff_min','rolling_bene_time_diff_max',
                 'rolling_time_min_out','rolling_time_max_out','rolling_time_min_in',
                 'rolling_time_max_in']

rolling_feats = ['rolling_sender_time_diff_min','rolling_sender_time_diff_max','rolling_bene_time_diff_min',
                 'rolling_bene_time_diff_max','rolling_time_min_out','rolling_time_max_out','rolling_time_min_in',
                 'rolling_time_max_in','rolling_outside_sender_time_diff_range','rolling_outside_bene_time_diff_range',
                 'rolling_outside_time_sender_range','rolling_outside_time_bene_range']

expanding_feats = ['sender_time_diff_min','sender_time_diff_max','bene_time_diff_min','bene_time_diff_max',
                   'time_min_out','time_max_out','time_min_in','time_max_in','outside_sender_time_diff_range',
                   'outside_bene_time_diff_range','outside_time_sender_range','outside_time_bene_range']

sender_feats = ['sender_time_btwn','sender_time_diff_min','sender_time_diff_max','outside_sender_time_diff_range','time_min_out',
                'time_max_out','outside_time_sender_range','rolling_sender_time_diff_min','rolling_sender_time_diff_max',
                'rolling_outside_sender_time_diff_range','rolling_time_min_out','rolling_time_max_out',
                'rolling_outside_time_sender_range','sender_burst_num']

bene_feats = ['bene_time_btwn','bene_time_diff_min','bene_time_diff_max','outside_bene_time_diff_range','time_min_in',
              'time_max_in','outside_time_bene_range','rolling_bene_time_diff_min','rolling_bene_time_diff_max',
              'rolling_outside_bene_time_diff_range','rolling_time_min_in','rolling_time_max_in',
              'rolling_outside_time_bene_range','bene_burst_num']

In [21]:
account = JPfraud[JPfraud.index == np.random.randint(0,len(JPfraud)*0.5-1)].Sender_Account.iloc[0]
account

'ACCOUNT-529493'

In [None]:
JPfraud[JPfraud.Bene_Account == account].drop(columns=drop_cols + ['sender_time_btwn',
                                                                   'sender_time_diff_min','sender_time_diff_max',
                                                                   'outside_sender_time_diff_range','time_min_out',
                                                                   'time_max_out','outside_time_sender_range'])

In [None]:
cutoff = round(0.7*len(JPfraud)) 
JPfraud_train = JPfraud.head(cutoff)
not_train =  JPfraud.tail(len(JPfraud)-cutoff)
cutoff2 = round(0.5*len(not_train))
JPfraud_val = not_train.head(cutoff2)
JPfraud_test = not_train.tail(len(not_train)-cutoff2)
JPfraud_train

In [24]:
# Reading in Brandon's dataframes
augmented_df_train = pd.read_csv('data/augmented_training_transaction_dataframe.csv')
augmented_df_validate = pd.read_csv('data/augmented_validation_transaction_dataframe.csv')

In [25]:
augmented_df_train.columns

Index(['Transaction_Id', 'Sender_Id', 'Sender_Account', 'Sender_Country',
       'Bene_Id', 'Bene_Account', 'Bene_Country', 'USD_amount', 'Label',
       'Transaction_Type', 'timestamp', 'proximity_to_fraud',
       'community_id_sender', 'community_id_beneficiary', 'same_community',
       'community_fraud_rate_sender', 'community_fraud_rate_beneficiary',
       'Sender_in_degree', 'Sender_out_degree', 'Sender_pagerank',
       'Sender_is_known_fraud', 'Bene_in_degree', 'Bene_out_degree',
       'Bene_pagerank', 'Bene_is_known_fraud', 'First_Number', 'Sender_Sector',
       'fraud_rate_by_sector'],
      dtype='object')

In [26]:
drop_cols2 = ['timestamp', 'Transaction_Id', 'Sender_Id', 'Sender_Account',
              'Sender_Country', 'Sender_Sector', 'Bene_Id', 'Bene_Account',
              'Bene_Country', 'USD_amount', 'Label', 'Transaction_Type','timestamp_uniq']

In [27]:
augmented_with_time_df_train = pd.concat([augmented_df_train,JPfraud_train.drop(columns=drop_cols2)],axis=1)
augmented_with_time_df_train.columns
#augmented_with_time_df_train = augmented_with_time_df_train.drop(columns=drop_cols_fit)
#augmented_with_time_df_train.to_csv('aug_training_with_expanding_burst_time_features.csv')

Index(['Transaction_Id', 'Sender_Id', 'Sender_Account', 'Sender_Country',
       'Bene_Id', 'Bene_Account', 'Bene_Country', 'USD_amount', 'Label',
       'Transaction_Type', 'timestamp', 'proximity_to_fraud',
       'community_id_sender', 'community_id_beneficiary', 'same_community',
       'community_fraud_rate_sender', 'community_fraud_rate_beneficiary',
       'Sender_in_degree', 'Sender_out_degree', 'Sender_pagerank',
       'Sender_is_known_fraud', 'Bene_in_degree', 'Bene_out_degree',
       'Bene_pagerank', 'Bene_is_known_fraud', 'First_Number', 'Sender_Sector',
       'fraud_rate_by_sector', 'sender_time_btwn', 'bene_time_btwn',
       'sender_time_diff_min', 'sender_time_diff_max', 'bene_time_diff_min',
       'bene_time_diff_max', 'outside_sender_time_diff_range',
       'outside_bene_time_diff_range', 'seconds_in_day', 'time_min_out',
       'time_max_out', 'time_min_in', 'time_max_in', 'outside_time_bene_range',
       'outside_time_sender_range', 'sender_burst_num', 'bene_b

In [None]:
JPfraud_val = JPfraud_val.reset_index(level=0,drop=True)
JPfraud_val

In [None]:
augmented_df_validate

In [30]:
augmented_with_time_df_val = pd.concat([augmented_df_validate,JPfraud_val.drop(columns=drop_cols2)],axis=1)
#augmented_with_time_df_val.to_csv('aug_validation_with_expanding_burst_time_features.csv')
augmented_with_time_df_val.columns

Index(['Transaction_Id', 'Sender_Id', 'Sender_Account', 'Sender_Country',
       'Sender_Sector', 'Bene_Id', 'Bene_Account', 'Bene_Country',
       'USD_amount', 'Label', 'Transaction_Type', 'timestamp',
       'proximity_to_fraud', 'community_id_sender', 'community_id_beneficiary',
       'same_community', 'community_fraud_rate_sender',
       'community_fraud_rate_beneficiary', 'Sender_in_degree',
       'Sender_out_degree', 'Sender_pagerank', 'Sender_is_known_fraud',
       'Bene_in_degree', 'Bene_out_degree', 'Bene_pagerank',
       'Bene_is_known_fraud', 'First_Number', 'fraud_rate_by_sector',
       'sender_time_btwn', 'bene_time_btwn', 'sender_time_diff_min',
       'sender_time_diff_max', 'bene_time_diff_min', 'bene_time_diff_max',
       'outside_sender_time_diff_range', 'outside_bene_time_diff_range',
       'seconds_in_day', 'time_min_out', 'time_max_out', 'time_min_in',
       'time_max_in', 'outside_time_bene_range', 'outside_time_sender_range',
       'sender_burst_num', 

In [91]:
augmented_with_time_df_train['sender_time_btwn_secs'] = augmented_with_time_df_train['sender_time_btwn'].transform(lambda x: x.dt.total_seconds())
augmented_with_time_df_train['bene_time_btwn_secs'] = augmented_with_time_df_train['bene_time_btwn'].transform(lambda x: x.dt.total_seconds())
augmented_with_time_df_val['sender_time_btwn_secs'] = augmented_with_time_df_val['sender_time_btwn'].transform(lambda x: x.dt.total_seconds())
augmented_with_time_df_val['bene_time_btwn_secs'] = augmented_with_time_df_val['bene_time_btwn'].transform(lambda x: x.dt.total_seconds())

In [60]:
import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV, train_test_split, TimeSeriesSplit, KFold
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import log_loss, confusion_matrix, classification_report, average_precision_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, FunctionTransformer
from sklearn.impute import SimpleImputer

In [65]:
augmented_with_time_df_train[features].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048708 entries, 0 to 1048707
Data columns (total 24 columns):
 #   Column                            Non-Null Count    Dtype          
---  ------                            --------------    -----          
 0   USD_amount                        1048708 non-null  float64        
 1   Transaction_Type                  1048708 non-null  object         
 2   proximity_to_fraud                1048708 non-null  int64          
 3   community_fraud_rate_sender       1048708 non-null  float64        
 4   community_fraud_rate_beneficiary  1048708 non-null  float64        
 5   Sender_in_degree                  1048708 non-null  int64          
 6   Sender_out_degree                 1048708 non-null  int64          
 7   Sender_pagerank                   1048708 non-null  float64        
 8   same_community                    1048708 non-null  int64          
 9   Sender_is_known_fraud             1048708 non-null  float64        
 10  Bene_i

In [87]:
features = ['USD_amount','Transaction_Type','proximity_to_fraud',
       'community_fraud_rate_sender','community_fraud_rate_beneficiary',
       'Sender_in_degree', 'Sender_out_degree', 'Sender_pagerank','same_community',
       'Sender_is_known_fraud', 'Bene_in_degree', 'Bene_out_degree',
       'Bene_pagerank', 'Bene_is_known_fraud', 'First_Number',
       'fraud_rate_by_sector', 'sender_time_btwn_secs', 'bene_time_btwn_secs',
       'outside_sender_time_diff_range','outside_bene_time_diff_range',
       'outside_time_bene_range','outside_time_sender_range','sender_burst_num',
       'bene_burst_num']

cat_features = ['Transaction_Type','outside_sender_time_diff_range','outside_bene_time_diff_range',
                'outside_time_sender_range','outside_time_bene_range','Sender_is_known_fraud',
                'Bene_is_known_fraud','same_community']

numeric_features = ['USD_amount','Sender_in_degree','Sender_out_degree','Sender_pagerank',
                    'Bene_in_degree','Bene_out_degree','Bene_pagerank','community_fraud_rate_sender',
                    'community_fraud_rate_beneficiary','fraud_rate_by_sector','sender_burst_num', 'bene_burst_num',
                    'First_Number']

time_features = ['sender_time_btwn_secs','bene_time_btwn_secs']

In [81]:
cat_transformer = OneHotEncoder()
time_transformer = SimpleImputer(strategy='median')

preprocessor = ColumnTransformer(transformers = [('time_feats', time_transformer, time_features),
                                                 ('categorical', cat_transformer, cat_features)])

pipe = Pipeline(steps =[('preprocessor', preprocessor),
                        ('log_reg', LogisticRegression())])

In [82]:
pipe.fit(augmented_with_time_df_train[features],augmented_with_time_df_train['Label'])

STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [93]:
print('Coeffs are: ', pipe.named_steps['log_reg'].coef_)
print('Intercept: ', pipe.named_steps['log_reg'].intercept_)

Coeffs are:  [[-3.07851109e-06  7.58573139e-07 -1.74218020e-01 -4.35825479e-01
  -1.87710682e-01 -7.56094847e-01 -1.77423839e-01  8.60497759e-03
   3.44573069e-01 -1.69151698e-01 -1.13756564e+00 -4.09676636e-01
  -8.58892426e-01 -6.88352291e-01 -9.13345734e-01 -6.33901860e-01
  -7.89288521e-01 -7.57961212e-01 -3.10830687e+00  1.56106068e+00
  -3.70362844e+00  2.15638805e+00 -9.53350841e-01 -5.93893102e-01]]
Intercept:  [-1.54740291]


In [106]:
predictions = pipe.predict(augmented_with_time_df_train[features])
print(classification_report(augmented_with_time_df_train[features], predictions))

TypeError: '<' not supported between instances of 'str' and 'float'

In [103]:
augmented_with_time_df_val[features].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 224724 entries, 0 to 224723
Data columns (total 24 columns):
 #   Column                            Non-Null Count   Dtype  
---  ------                            --------------   -----  
 0   USD_amount                        224724 non-null  float64
 1   Transaction_Type                  224724 non-null  object 
 2   proximity_to_fraud                224724 non-null  int64  
 3   community_fraud_rate_sender       224724 non-null  float64
 4   community_fraud_rate_beneficiary  224724 non-null  float64
 5   Sender_in_degree                  191934 non-null  float64
 6   Sender_out_degree                 191934 non-null  float64
 7   Sender_pagerank                   191934 non-null  float64
 8   same_community                    224724 non-null  int64  
 9   Sender_is_known_fraud             191934 non-null  float64
 10  Bene_in_degree                    192670 non-null  float64
 11  Bene_out_degree                   192670 non-null  f

In [104]:
augmented_with_time_df_train[features].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048708 entries, 0 to 1048707
Data columns (total 24 columns):
 #   Column                            Non-Null Count    Dtype  
---  ------                            --------------    -----  
 0   USD_amount                        1048708 non-null  float64
 1   Transaction_Type                  1048708 non-null  object 
 2   proximity_to_fraud                1048708 non-null  int64  
 3   community_fraud_rate_sender       1048708 non-null  float64
 4   community_fraud_rate_beneficiary  1048708 non-null  float64
 5   Sender_in_degree                  1048708 non-null  int64  
 6   Sender_out_degree                 1048708 non-null  int64  
 7   Sender_pagerank                   1048708 non-null  float64
 8   same_community                    1048708 non-null  int64  
 9   Sender_is_known_fraud             1048708 non-null  float64
 10  Bene_in_degree                    1048708 non-null  int64  
 11  Bene_out_degree                   104

In [None]:
print(classification_report(y_test, Label_preds))