In [None]:
import pandas as pd
from matplotlib import pyplot
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder

dataset_URL = "https://raw.githubusercontent.com/CheMBurN695/FinCrime_ML_Model/refs/heads/master/Trnx_Dataset.csv?token=GHSAT0AAAAAACYPUMUIAXADJP4OF7RWQUN6Z3IP6OQ"
testing_dataset_URL = "https://raw.githubusercontent.com/CheMBurN695/FinCrime_ML_Model/refs/heads/master/Testing_Dataset.csv?token=GHSAT0AAAAAACYPUMUIE5I4XHHLIEVZVFG4Z3IP6NA"
training_dataset_URL = "https://raw.githubusercontent.com/CheMBurN695/FinCrime_ML_Model/refs/heads/master/Training_Dataset.csv?token=GHSAT0AAAAAACYPUMUIORTV4YK2A6YTY7O4Z3IP6NQ"
dataFrame = pd.read_csv(dataset_URL)
training_dataFrame = pd.read_csv(training_dataset_URL)
testing_dataFrame = pd.read_csv(testing_dataset_URL)

In [None]:
sowByCountry = dataFrame.groupby(["Country", "Source of Money"]).size()


In [None]:
country_count = dataFrame.groupby(["Country"]).size()
illegal_trnxs = dataFrame['Source of Money'] == "Illegal"
illegal_trnx_byCountry = dataFrame[illegal_trnxs].groupby(["Country"]).size()
not_illegal_trnx_byCountry = country_count - illegal_trnx_byCountry
percent_illegal = (illegal_trnx_byCountry / country_count) * 100
percent_legal = (not_illegal_trnx_byCountry / country_count) * 100

_fig, _axes = pyplot.subplots(figsize=(8,6))
_axes.bar(illegal_trnx_byCountry.index, illegal_trnx_byCountry, color='red', label="Illegal")
_axes.bar(not_illegal_trnx_byCountry.index, not_illegal_trnx_byCountry, color='green', label="Legal")

_axes.set_title("Distribution of Illegal Transactions per Country In Dataset")
_axes.set_xlabel("Country")
_axes.set_ylabel("Transaction Count")
_axes.set_xticks(country_count.index)
_axes.set_xticklabels(country_count.index, rotation=45)
_axes.legend()
pyplot.show()


In [None]:

label_encoder_country = LabelEncoder()
label_encoder_destination_country = LabelEncoder()
label_encoder_transaction_type = LabelEncoder()
label_encoder_person_involved = LabelEncoder()
label_encoder_industry = LabelEncoder()
label_encoder_source_of_money = LabelEncoder()
label_encoder_ML_score = LabelEncoder()

label_encoder_country.fit(dataFrame['Country'])
label_encoder_destination_country.fit(dataFrame['Destination Country'])
label_encoder_transaction_type.fit(dataFrame['Transaction Type'])
label_encoder_person_involved.fit(dataFrame['Person Involved'])
label_encoder_industry.fit(dataFrame['Industry'])
label_encoder_source_of_money.fit(dataFrame['Source of Money'])
label_encoder_ML_score.fit(dataFrame['Money Laundering Risk Score'])

training_dataFrame['Country'] = label_encoder_country.transform(training_dataFrame['Country'])
training_dataFrame['Destination Country'] = label_encoder_destination_country.transform(training_dataFrame['Destination Country'])
training_dataFrame['Transaction Type'] = label_encoder_transaction_type.transform(training_dataFrame['Transaction Type'])
training_dataFrame['Person Involved'] = label_encoder_person_involved.transform(training_dataFrame['Person Involved'])
training_dataFrame['Industry'] = label_encoder_industry.transform(training_dataFrame['Industry'])
training_dataFrame['Source of Money'] = label_encoder_source_of_money.transform(training_dataFrame['Source of Money'])
training_dataFrame['Money Laundering Risk Score'] = label_encoder_ML_score.transform(training_dataFrame['Money Laundering Risk Score'])

X = training_dataFrame[['Industry', 'Country', 'Destination Country']]
Y = training_dataFrame['Money Laundering Risk Score']

In [None]:
# testing data

training_dataFrame['Country'] = label_encoder_country.transform(training_dataFrame['Country'])
training_dataFrame['Destination Country'] = label_encoder_destination_country.transform(training_dataFrame['Destination Country'])
training_dataFrame['Transaction Type'] = label_encoder_transaction_type.transform(training_dataFrame['Transaction Type'])
training_dataFrame['Person Involved'] = label_encoder_person_involved.transform(training_dataFrame['Person Involved'])
training_dataFrame['Industry'] = label_encoder_industry.transform(training_dataFrame['Industry'])
training_dataFrame['Source of Money'] = label_encoder_source_of_money.transform(training_dataFrame['Source of Money'])
training_dataFrame['Money Laundering Risk Score'] = label_encoder_ML_score.transform(training_dataFrame['Money Laundering Risk Score'])

X_test = testing_dataFrame[['Industry', 'Country', 'Destination Country']]
Y_test = testing_dataFrame['Money Laundering Risk Score']


In [None]:
model = LogisticRegression(max_iter=1000)
model.fit(X,Y)

In [None]:
predictions = model.predict(X_test)
print(predictions)

