In [126]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report, confusion_matrix
scaler = MinMaxScaler()

In [127]:
df = pd.read_csv("credit_card_transactions.csv")
df.head()

Unnamed: 0,TransactionID,Time,Amount,CardholderName,MerchantName,MCC,Location,Device,IPAddress,TransactionSpeed,Fraud
0,1,2025-03-09 06:23:57,83.54,Sarah Murray,Oconnor-Hughes,5411,Longtown,Android,102.247.49.50,38.17,0
1,2,2025-02-14 05:17:36,805.88,Alexander Brooks,Reed PLC,5261,Woodsland,POS,153.157.50.7,49.84,0
2,3,2024-05-03 15:28:52,450.49,Harold Perez,Berry LLC,5261,Jessicatown,iOS,151.174.154.123,48.79,0
3,4,2024-05-03 09:01:14,782.39,Rebecca Davis,Berg Inc,5968,North Krystalburgh,Android,130.70.79.149,8.71,0
4,5,2025-04-24 17:24:39,713.02,Lauren Benson DDS,King-Gaines,5968,East Danielletown,Android,195.136.16.190,17.16,0


In [128]:
df.dtypes

TransactionID         int64
Time                 object
Amount              float64
CardholderName       object
MerchantName         object
MCC                   int64
Location             object
Device               object
IPAddress            object
TransactionSpeed    float64
Fraud                 int64
dtype: object

In [129]:
#Transformacion de datos
df['Time'] = pd.to_datetime(df['Time'], errors = 'coerce')
df['MCC'] = df['MCC'].astype(object)
df['isWeekday'] = df['Time'].dt.weekday


In [130]:
#Nueva feature
df['Hour'] = df['Time'].dt.hour

In [131]:
df.dtypes

TransactionID                int64
Time                datetime64[ns]
Amount                     float64
CardholderName              object
MerchantName                object
MCC                         object
Location                    object
Device                      object
IPAddress                   object
TransactionSpeed           float64
Fraud                        int64
isWeekday                    int32
Hour                         int32
dtype: object

In [132]:
df.describe()

Unnamed: 0,TransactionID,Time,Amount,TransactionSpeed,Fraud,isWeekday,Hour
count,10000.0,10000,10000.0,10000.0,10000.0,10000.0,10000.0
mean,5000.5,2024-10-30 03:27:20.494300160,547.347491,30.738446,0.0192,2.9954,11.5255
min,1.0,2024-05-01 07:29:17,10.09,0.11,0.0,0.0,0.0
25%,2500.75,2024-07-31 11:55:39.500000,263.2225,14.91,0.0,1.0,6.0
50%,5000.5,2024-10-30 22:49:50,512.24,29.655,0.0,3.0,12.0
75%,7500.25,2025-01-28 03:57:06.750000128,759.935,45.27,0.0,5.0,17.0
max,10000.0,2025-05-01 09:39:46,4988.07,119.86,1.0,6.0,23.0
std,2886.89568,,460.047329,19.092345,0.137234,1.991626,6.932341


In [133]:
df = df.drop(['CardholderName','TransactionID' ], axis=1)

In [134]:
df.head()

Unnamed: 0,Time,Amount,MerchantName,MCC,Location,Device,IPAddress,TransactionSpeed,Fraud,isWeekday,Hour
0,2025-03-09 06:23:57,83.54,Oconnor-Hughes,5411,Longtown,Android,102.247.49.50,38.17,0,6,6
1,2025-02-14 05:17:36,805.88,Reed PLC,5261,Woodsland,POS,153.157.50.7,49.84,0,4,5
2,2024-05-03 15:28:52,450.49,Berry LLC,5261,Jessicatown,iOS,151.174.154.123,48.79,0,4,15
3,2024-05-03 09:01:14,782.39,Berg Inc,5968,North Krystalburgh,Android,130.70.79.149,8.71,0,4,9
4,2025-04-24 17:24:39,713.02,King-Gaines,5968,East Danielletown,Android,195.136.16.190,17.16,0,3,17


In [135]:
df.isna().sum()

Time                0
Amount              0
MerchantName        0
MCC                 0
Location            0
Device              0
IPAddress           0
TransactionSpeed    0
Fraud               0
isWeekday           0
Hour                0
dtype: int64

In [136]:

categoricFeatures = ['MerchantName','Location', 'IPAddress', 'MCC', 'Device']
numericFeatures = ['Amount', 'Hour', 'TransactionSpeed']

#Normalizacion para categoricas
for col in categoricFeatures:
      df[col] = LabelEncoder().fit_transform(df[col])

#Normalizacion para numericas
df[numericFeatures] = scaler.fit_transform(df[numericFeatures])

In [137]:
#Aislamos labels
x = df.drop(['Fraud','Time'], axis= 1 )
y = df['Fraud']

In [138]:
#Split test and test
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

model = GaussianNB()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [139]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1953
           1       1.00      0.85      0.92        47

    accuracy                           1.00      2000
   macro avg       1.00      0.93      0.96      2000
weighted avg       1.00      1.00      1.00      2000



In [140]:
print(confusion_matrix(y_test, y_pred))


[[1953    0]
 [   7   40]]


In [141]:
#Data horriblemente desbalanceada xd

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      1953
           1       1.00      0.85      0.92        47

    accuracy                           1.00      2000
   macro avg       1.00      0.93      0.96      2000
weighted avg       1.00      1.00      1.00      2000

