In [1]:
import os
import sys
import pandas as pd
sys.path.append("../")
from pyspark.sql import SparkSession
from pyspark.sql.functions import split
from pyspark.sql import functions as F
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.neural_network import MLPRegressor
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.svm import SVR

In [2]:
spark = (
    SparkSession.builder.appName("local")
    .config("spark.sql.repl.eagerEval.enabled", True) 
    .config("spark.sql.parquet.cacheMetadata", "true")
    .config("spark.sql.session.timeZone", "Etc/UTC")
    .getOrCreate()
)

24/09/17 10:19:07 WARN Utils: Your hostname, Khues-MacBook-Pro.local resolves to a loopback address: 127.0.0.1; using 10.13.56.89 instead (on interface en0)
24/09/17 10:19:07 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
24/09/17 10:19:07 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable


In [3]:
consumer_details = spark.read.parquet("../data/tables/synthetic/consumer_user_details.parquet")
merchants = spark.read.parquet("../data/tables/synthetic/tbl_merchants.parquet")
consumer_fraud = pd.read_csv("../data/tables/synthetic/consumer_fraud_probability.csv")
merchants_fraud = pd.read_csv("../data/tables/synthetic/merchant_fraud_probability.csv")
consumer = pd.read_csv("../data/tables/synthetic/tbl_consumer.csv")
transactions = spark.read.parquet("../data/tables/synthetic/transactions/*")

                                                                                

## Consumer

In [4]:
consumer_details.describe() #transaction made by 24081 user_id

                                                                                

summary,user_id,consumer_id
count,499999.0,499999.0
mean,250000.0,750895.2123184246
stddev,144337.422959767,433100.42601411033
min,1.0,4.0
max,499999.0,1499995.0


In [5]:
consumer_fraud.describe()

Unnamed: 0,user_id,fraud_probability
count,34864.0,34864.0
mean,12057.00393,15.120091
std,6963.195641,9.946085
min,1.0,8.287144
25%,6059.75,9.634437
50%,12067.5,11.735624
75%,18091.25,16.216158
max,24081.0,99.24738


In [6]:
consumer[['name', 'address', 'state', 'postcode', 'gender', 'consumer_id']] = consumer['name|address|state|postcode|gender|consumer_id'].str.split('|', expand=True)
consumer = consumer.drop(columns=['name|address|state|postcode|gender|consumer_id'])

In [7]:
consumer.dtypes

name           object
address        object
state          object
postcode       object
gender         object
consumer_id    object
dtype: object

In [8]:
consumer['consumer_id'] = consumer['consumer_id'].astype(int)
consumer['postcode'] = consumer['postcode'].astype(int)
consumer['state_encoded'] = LabelEncoder().fit_transform(consumer['state'])
consumer['gender_encoded'] = LabelEncoder().fit_transform(consumer['gender'])

In [52]:
consumer_fraud['user_id'].nunique()

20128

In [9]:
consumer.dtypes

name              object
address           object
state             object
postcode           int64
gender            object
consumer_id        int64
state_encoded      int64
gender_encoded     int64
dtype: object

The list of merchants from merchants_df is less than the number of merchants shown in the transaction

In [10]:
details = pd.merge(consumer_details.toPandas(), consumer, on='consumer_id', how='outer')
details



CodeCache: size=131072Kb used=24990Kb max_used=24990Kb free=106081Kb
 bounds [0x000000010213c000, 0x00000001039cc000, 0x000000010a13c000]
 total_blobs=10420 nmethods=9427 adapters=905
 compilation: disabled (not enough contiguous free space left)


Unnamed: 0,user_id,consumer_id,name,address,state,postcode,gender,state_encoded,gender_encoded
0,108417,4,Michele Kelley,28656 Sims Plaza Suite 036,NSW,2774,Female,1,0
1,371406,7,James Williams,3709 Mary River,TAS,7248,Male,5,1
2,167772,9,Timothy Ramos,290 Melissa Point Apt. 123,QLD,4694,Male,3,1
3,137864,10,Beth Mendoza,5002 Meredith Views,QLD,4426,Female,3,0
4,92127,19,Dennis Ramirez,20761 Matthews Via Apt. 694,QLD,4406,Male,3,1
...,...,...,...,...,...,...,...,...,...
499994,419774,1499983,Juan Sims,15168 Jones Row,WA,6054,Male,7,1
499995,25039,1499984,Jenna Serrano,53746 Kenneth Avenue,NSW,1740,Female,1,0
499996,396464,1499985,Alyssa Myers,047 William Plain Suite 431,NSW,2549,Undisclosed,1,2
499997,46172,1499986,Emily Turner,8118 Kimberly Estate Apt. 757,QLD,4475,Female,3,0


In [11]:
consumer_full = pd.merge(details, consumer_fraud, on='user_id', how='outer')
consumer_full['user_id'].nunique()

499999

In [12]:
consumer_full[consumer_full.isnull().any(axis=1)].count()

user_id              479871
consumer_id          479871
name                 479871
address              479871
state                479871
postcode             479871
gender               479871
state_encoded        479871
gender_encoded       479871
order_datetime            0
fraud_probability         0
dtype: int64

In [19]:
consumer_full.dtypes

user_id                int64
consumer_id            int64
name                  object
address               object
state                 object
postcode               int64
gender                object
state_encoded          int64
gender_encoded         int64
order_datetime        object
fraud_probability    float64
dtype: object

In [20]:
consumer_full = consumer_full.set_index('user_id')

In [37]:
trans_by_user = transactions.groupBy('user_id') \
                    .agg(
                        F.count('order_id').alias('order_count'),
                        F.sum('dollar_value').alias('total_dollar_value'),
                    )
trans_by_user_pd = trans_by_user.toPandas()
trans_by_user_pd = trans_by_user_pd.set_index('user_id')
trans_by_user_pd['avg_dollar_value'] = trans_by_user_pd['total_dollar_value']/trans_by_user_pd['order_count']

                                                                                

In [51]:
transactions.select(F.countDistinct("user_id")).collect()[0][0]

                                                                                

24081

In [40]:
consumer_trans = pd.merge(consumer_full, trans_by_user_pd, on='user_id', how='outer')
consumer_trans

Unnamed: 0_level_0,consumer_id,name,address,state,postcode,gender,state_encoded,gender_encoded,order_datetime,fraud_probability,order_count,total_dollar_value,avg_dollar_value
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1195503,Yolanda Williams,413 Haney Gardens Apt. 742,WA,6935,Female,7,0,2022-02-20,9.805431,584.0,94927.163969,162.546514
2,179208,Mary Smith,3764 Amber Oval,NSW,2782,Female,1,0,2021-09-25,10.069851,585.0,123953.506822,211.886336
2,179208,Mary Smith,3764 Amber Oval,NSW,2782,Female,1,0,2021-08-30,9.599514,585.0,123953.506822,211.886336
3,1194530,Jill Jones MD,40693 Henry Greens,NT,862,Female,2,0,2021-11-03,8.300636,587.0,97035.322010,165.307193
4,154128,Lindsay Jimenez,00653 Davenport Crossroad,NSW,2780,Female,1,0,2021-10-09,9.633302,593.0,87592.252770,147.710376
...,...,...,...,...,...,...,...,...,...,...,...,...,...
499995,1385608,Jessica Avila,508 Miranda Overpass Apt. 218,QLD,4400,Female,3,0,,,,,
499996,1466964,Steven Thornton,7913 Schwartz Mission Suite 483,VIC,3097,Undisclosed,6,2,,,,,
499997,1253484,Christy Smith,5681 Zachary Mountain Apt. 060,NSW,2756,Undisclosed,1,2,,,,,
499998,175005,Donna Sutton,54140 Jacob Point,VIC,3989,Female,6,0,,,,,


## Fraud prob

In [41]:
consumer_filtered = consumer_trans.dropna(subset=['fraud_probability'])
consumer_filtered

Unnamed: 0_level_0,consumer_id,name,address,state,postcode,gender,state_encoded,gender_encoded,order_datetime,fraud_probability,order_count,total_dollar_value,avg_dollar_value
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1195503,Yolanda Williams,413 Haney Gardens Apt. 742,WA,6935,Female,7,0,2022-02-20,9.805431,584.0,94927.163969,162.546514
2,179208,Mary Smith,3764 Amber Oval,NSW,2782,Female,1,0,2021-09-25,10.069851,585.0,123953.506822,211.886336
2,179208,Mary Smith,3764 Amber Oval,NSW,2782,Female,1,0,2021-08-30,9.599514,585.0,123953.506822,211.886336
3,1194530,Jill Jones MD,40693 Henry Greens,NT,862,Female,2,0,2021-11-03,8.300636,587.0,97035.322010,165.307193
4,154128,Lindsay Jimenez,00653 Davenport Crossroad,NSW,2780,Female,1,0,2021-10-09,9.633302,593.0,87592.252770,147.710376
...,...,...,...,...,...,...,...,...,...,...,...,...,...
24079,256441,Tony Schmidt,670 Burke Turnpike Apt. 079,VIC,3029,Male,6,1,2021-09-12,9.721157,594.0,122228.006616,205.771055
24079,256441,Tony Schmidt,670 Burke Turnpike Apt. 079,VIC,3029,Male,6,1,2021-11-08,8.940524,594.0,122228.006616,205.771055
24079,256441,Tony Schmidt,670 Burke Turnpike Apt. 079,VIC,3029,Male,6,1,2021-11-26,8.838622,594.0,122228.006616,205.771055
24081,1177416,Jacqueline Rice,98418 Gary Creek Suite 728,VIC,3980,Female,6,0,2021-10-08,14.343772,588.0,95216.166563,161.932256


In [42]:
consumer_filtered_1 = consumer_filtered.groupby(['user_id', 'order_count', 'total_dollar_value', 'avg_dollar_value']) \
                                .agg(avg_fraud_prob=('fraud_probability', 'mean')).reset_index()
consumer_filtered_1

Unnamed: 0,user_id,order_count,total_dollar_value,avg_dollar_value,avg_fraud_prob
0,1,584.0,94927.163969,162.546514,9.805431
1,2,585.0,123953.506822,211.886336,9.834682
2,3,587.0,97035.322010,165.307193,8.300636
3,4,593.0,87592.252770,147.710376,9.633302
4,5,609.0,94880.476559,155.797170,15.794925
...,...,...,...,...,...
20123,24075,580.0,97195.068007,167.577703,18.463737
20124,24076,596.0,106311.225711,178.374540,15.657614
20125,24078,580.0,93378.017587,160.996582,15.048298
20126,24079,594.0,122228.006616,205.771055,10.612117


In [45]:
X = consumer_filtered_1.drop(['user_id', 'avg_fraud_prob'], axis=1)  # features
y = consumer_filtered_1['avg_fraud_prob']  # target

# Split into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training size: {X_train.shape}, Test size: {X_test.shape}")

model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model using Mean Squared Error (MSE) and R^2 score
mse = mean_squared_error(y_test, y_pred)
r2 = model.score(X_test, y_test)

print(f"Mean Squared Error: {mse}")
print(f"R^2 Score: {r2}")


Training size: (16102, 3), Test size: (4026, 3)
Mean Squared Error: 78.32019729220906
R^2 Score: 0.16217618591106464


In [46]:
consumer_wo_p = consumer_full.drop(['consumer_id', 'name', 'address', 'state', 'gender'], axis=1)

In [47]:
consumer_wo_p_1 = consumer_wo_p[consumer_wo_p['fraud_probability'].isna()].drop(['order_datetime', 'fraud_probability'], axis=1)
consumer_wo_p_1

Unnamed: 0_level_0,postcode,state_encoded,gender_encoded
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
7,4606,3,0
8,6056,7,1
10,3220,6,0
11,3063,6,2
12,6743,7,0
...,...,...,...
499995,4400,3,0
499996,3097,6,2
499997,2756,1,2
499998,3989,6,0


In [48]:
consumer_wo_p_1['avg_fraud_prob'] = model.predict(consumer_wo_p_1.drop('user_id', axis=1))
consumer_wo_p_1

KeyError: "['user_id'] not found in axis"

In [None]:
pd.concat([consumer_wo_p_1, consumer_filtered_1], ignore_index=True)

## Transactions

                                                                                

Unnamed: 0_level_0,consumer_id,name,address,state,postcode,gender,state_encoded,gender_encoded,order_datetime,fraud_probability,order_count,total_dollar_value,avg_dollar_value
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,1195503,Yolanda Williams,413 Haney Gardens Apt. 742,WA,6935,Female,7,0,2022-02-20,9.805431,584,94927.163969,162.546514
2,179208,Mary Smith,3764 Amber Oval,NSW,2782,Female,1,0,2021-09-25,10.069851,585,123953.506822,211.886336
2,179208,Mary Smith,3764 Amber Oval,NSW,2782,Female,1,0,2021-08-30,9.599514,585,123953.506822,211.886336
3,1194530,Jill Jones MD,40693 Henry Greens,NT,862,Female,2,0,2021-11-03,8.300636,587,97035.322010,165.307193
4,154128,Lindsay Jimenez,00653 Davenport Crossroad,NSW,2780,Female,1,0,2021-10-09,9.633302,593,87592.252770,147.710376
...,...,...,...,...,...,...,...,...,...,...,...,...,...
24079,256441,Tony Schmidt,670 Burke Turnpike Apt. 079,VIC,3029,Male,6,1,2021-11-08,8.940524,594,122228.006616,205.771055
24079,256441,Tony Schmidt,670 Burke Turnpike Apt. 079,VIC,3029,Male,6,1,2021-11-26,8.838622,594,122228.006616,205.771055
24080,940951,Amy Russo,4525 Frazier Meadows,NSW,2809,Female,1,0,,,591,88638.274060,149.980159
24081,1177416,Jacqueline Rice,98418 Gary Creek Suite 728,VIC,3980,Female,6,0,2021-10-08,14.343772,588,95216.166563,161.932256


## Merchants + take rate

In [20]:
merchants_1 = merchants.withColumn('tags', F.lower(merchants['tags'])) \
                        .withColumn('name', F.lower(merchants['name'])).toPandas()

In [21]:
merchants_1['tags'] = merchants_1['tags'].str.replace('[', '(').str.replace(']', ')')
merchants_1['tags'] = merchants_1['tags'].astype('string')

In [22]:
merchants_1[['type', 'rev_level', 'take_rate']] = merchants_1['tags'].str.split(r'\), \(', expand=True)
merchants_1 = merchants_1.drop(columns=['tags'])

In [23]:
merchants_1['type'] = merchants_1['type'].str.replace('((', '', regex=False)
merchants_1['take_rate'] = merchants_1['take_rate'].str.replace('))', '', regex=False).str.replace('take rate: ', '', regex=False).astype(float)

In [24]:
merchants_1.dtypes

name                    object
merchant_abn             int64
type            string[python]
rev_level       string[python]
take_rate              float64
dtype: object

In [28]:
transactions = transactions.withColumn('merchant_abn', F.col('merchant_abn').cast('int'))
transactions.dtypes

[('user_id', 'bigint'),
 ('merchant_abn', 'int'),
 ('dollar_value', 'double'),
 ('order_id', 'string')]

In [30]:
merchants_1

Unnamed: 0,name,merchant_abn,type,rev_level,take_rate
0,felis limited,10023283211,"furniture, home furnishings and equipment shop...",e,0.18
1,arcu ac orci corporation,10142254217,"cable, satellite, and other pay television and...",b,4.22
2,nunc sed company,10165489824,"jewelry, watch, clock, and silverware shops",b,4.40
3,ultricies dignissim lacus foundation,10187291046,"watch, clock, and jewelry repair shops",b,3.29
4,enim condimentum pc,10192359162,"music shops - musical instruments, pianos, and...",a,6.33
...,...,...,...,...,...
4021,elit dictum eu ltd,99938978285,"opticians, optical goods, and eyeglasses",b,4.50
4022,mollis llp,99974311662,"books, periodicals, and newspapers",b,3.17
4023,sociosqu corp.,99976658299,shoe shops,a,6.57
4024,commodo hendrerit llc,99987905597,motor vehicle supplies and new parts,a,6.82


In [31]:
transactions

user_id,merchant_abn,dollar_value,order_id
14935,2108588004,136.06570809815838,23acbb7b-cf98-458...
1,-793091288,72.61581642788431,76bab304-fa2d-400...
14936,-675683599,3.0783487174439297,a2ae446a-2959-41c...
1,1922891469,51.58228625503599,7080c274-17f7-4cc...
14936,1036349571,25.2281149424178,8e301c0f-06ab-45c...
2,-741004848,691.5028234458998,0380e9ad-b0e8-420...
14936,826914280,102.13952056640888,5ac3da9c-5147-452...
2,399556458,644.5220654863093,4e368e44-86f8-4de...
14938,994852201,209.12780951421405,4d78cd01-4bab-494...
3,-1792138759,141.0387993699113,c50c957d-ecfc-430...


In [29]:
merchants_trans = merchants_1.join(transactions, on='merchant_abn')
merchants_trans

ValueError: Joining multiple DataFrames only supported for joining on index

In [42]:
merchants_trans_1 = merchants_trans \
                    .groupBy('name', 'tags', 'merchant_abn') \
                    .agg(
                        F.count('order_id').alias('order_count'),
                        F.sum('dollar_value').alias('total_dollar_value'),
                    )
merchants_trans_1

                                                                                

name,tags,merchant_abn,order_count,total_dollar_value
eget lacus llp,"[[cable, satellit...",47663262928,10422,1013371.6659596296
arcu vestibulum c...,"((hobby, toy and ...",17158952809,20258,1269279.4663547624
arcu iaculis corp...,"([shoe shops], [a...",10955677986,754,176773.53578258128
mauris aliquam eu...,"([cable, satellit...",75720304166,578,44868.54430167646
vel pede blandit ...,"((opticians, opti...",52535771754,4177,1184849.041883478
pretium neque inc.,([tent and awning...,77013874702,3448,46572.26047940074
nullam scelerisqu...,"[(opticians, opti...",78663389603,978,87128.57419733977
eget llc,"[(stationery, off...",49514072231,1647,1204279.1570783928
diam sed inc.,[[computer progra...,25607153542,461,45733.82825409439
donec dignissim m...,"((telecom), (c), ...",95938358124,126,235756.5269952945




In [45]:
merchants_trans_1 = merchants_trans_1.withColumn('spent_per_order', F.col('total_dollar_value') / F.col('order_count'))


In [46]:
merchants_trans_1.dropDuplicates(subset=['name', 'tags', 'merchant_abn'])
merchants_trans_1.count() #no duplicate merchants

                                                                                

4026

In [50]:
fraud = pd.merge(merchants_trans_1.toPandas(), merchants_fraud, on='merchant_abn')
fraud

                                                                                

Unnamed: 0,name,tags,merchant_abn,order_count,total_dollar_value,spent_per_order,order_datetime,fraud_probability
0,dictum phasellus in institute,"[(gift, card, novelty, and souvenir shops), (a...",94493496784,99176,9.115636e+06,91.913728,2021-11-26,30.579032
1,lacus aliquam corporation,"[(antique shops - sales, repairs, and restora...",31334588839,1527,9.630683e+06,6306.930472,2021-10-02,42.755301
2,lacus aliquam corporation,"[(antique shops - sales, repairs, and restora...",31334588839,1527,9.630683e+06,6306.930472,2021-12-26,38.361660
3,lacus aliquam corporation,"[(antique shops - sales, repairs, and restora...",31334588839,1527,9.630683e+06,6306.930472,2021-11-26,36.209713
4,lacus aliquam corporation,"[(antique shops - sales, repairs, and restora...",31334588839,1527,9.630683e+06,6306.930472,2021-11-29,35.386213
...,...,...,...,...,...,...,...,...
90,pharetra quisque company,"([jewelry, watch, clock, and silverware shops]...",90918180829,557,5.514145e+06,9899.721335,2022-01-27,33.080273
91,pharetra quisque company,"([jewelry, watch, clock, and silverware shops]...",90918180829,557,5.514145e+06,9899.721335,2022-02-19,30.975819
92,nullam enim sed incorporated,"((tent and awning shops), (e), (take rate: 0.27))",78080443264,1099,2.241564e+04,20.396394,2021-11-26,69.095317
93,ornare limited,"([motor vehicle supplies and new parts], [a], ...",96680767841,31134,9.806731e+06,314.984620,2021-11-26,29.555245




In [51]:
fraud_1 = fraud.groupby(['name', 'tags', 'merchant_abn', 'order_count',	'total_dollar_value', 'spent_per_order']) \
                .agg(fraud_count=('order_datetime', 'count'), avg_fraud_prob=('fraud_probability', 'mean'))

fraud_1

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,fraud_count,avg_fraud_prob
name,tags,merchant_abn,order_count,total_dollar_value,spent_per_order,Unnamed: 6_level_1,Unnamed: 7_level_1
accumsan corporation,"[(jewelry, watch, clock, and silverware shops), (b), (take rate: 4.65)]",86889657711,78,1160758.0,14881.517466,1,63.212226
amet risus inc.,"([furniture, home furnishings and equipment shops, and manufacturers, except appliances], [a], [take rate: 6.82])",79827781481,4798,9734168.0,2028.797098,2,29.735159
arcu morbi institute,"([artist supply and craft shops], [c], [take rate: 1.47])",39649557865,21919,9857402.0,449.719528,1,28.690204
arcu sed eu incorporated,"[(computer programming , data processing, and integrated systems design services), (b), (take rate: 4.80)]",35909341340,37985,9528214.0,250.841492,1,29.133053
arcu sed pc,"((antique shops - sales, repairs, and restoration services), (b), (take rate: 4.19))",11590404675,51,810889.3,15899.790936,1,29.607818
arcu vivamus sit limited,"([shoe shops], [a], [take rate: 5.91])",30275989942,235,38633.79,164.399103,1,63.108593
at foundation,"((antique shops - sales, repairs, and restoration services), (b), (take rate: 3.40))",91880575299,33,668259.8,20250.295557,1,32.994978
commodo ipsum industries,"([jewelry, watch, clock, and silverware shops], [b], [take rate: 4.93])",19492220327,824,8165223.0,9909.250618,8,31.958307
diam eu dolor llc,"[(tent and awning shops), (b), (take rate: 4.10)]",90568944804,10732,9618325.0,896.228525,3,30.722985
dictum phasellus in institute,"[(gift, card, novelty, and souvenir shops), (a), (take rate: 5.65)]",94493496784,99176,9115636.0,91.913728,1,30.579032




In [131]:
merchants_fraud

Unnamed: 0,merchant_abn,order_datetime,fraud_probability
0,19492220327,2021-11-28,44.403659
1,31334588839,2021-10-02,42.755301
2,19492220327,2021-12-22,38.867790
3,82999039227,2021-12-19,94.134700
4,90918180829,2021-09-02,43.325517
...,...,...,...
109,80089686333,2021-09-01,67.505811
110,83220249221,2021-11-27,63.512459
111,85482742429,2021-11-27,70.881311
112,11470993597,2021-09-28,63.377344


24/09/10 11:30:05 WARN TransportChannelHandler: Exception in connection from /172.20.10.7:64872
java.io.IOException: Operation timed out
	at sun.nio.ch.FileDispatcherImpl.read0(Native Method)
	at sun.nio.ch.SocketDispatcher.read(SocketDispatcher.java:39)
	at sun.nio.ch.IOUtil.readIntoNativeBuffer(IOUtil.java:223)
	at sun.nio.ch.IOUtil.read(IOUtil.java:192)
	at sun.nio.ch.SocketChannelImpl.read(SocketChannelImpl.java:378)
	at io.netty.buffer.PooledByteBuf.setBytes(PooledByteBuf.java:254)
	at io.netty.buffer.AbstractByteBuf.writeBytes(AbstractByteBuf.java:1132)
	at io.netty.channel.socket.nio.NioSocketChannel.doReadBytes(NioSocketChannel.java:357)
	at io.netty.channel.nio.AbstractNioByteChannel$NioByteUnsafe.read(AbstractNioByteChannel.java:151)
	at io.netty.channel.nio.NioEventLoop.processSelectedKey(NioEventLoop.java:788)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeysOptimized(NioEventLoop.java:724)
	at io.netty.channel.nio.NioEventLoop.processSelectedKeys(NioEventLoop.java: