In [10]:
pip install -U scikit-learn

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from numpy import isnan

# Load data
data = pd.read_csv('fraud_data.csv')

data.fillna(0, inplace=True)

# Convert time feature to numerical representation
data['Time'] = pd.to_datetime(data['Time'], format='%M:%S').dt.minute * 60 + pd.to_datetime(data['Time'], format='%M:%S').dt.second


# Convert currency feature to numerical representation
data['Amount'] = data['Amount'].replace({'\$': ''}, regex=True).astype(float)

# Convert categorical feature to numerical representation
data['Use Chip'] = data['Use Chip'].astype('category').cat.codes

# Convert categorical feature to numerical representation
data['Merchant State'] = data['Merchant State'].astype('category').cat.codes

# Convert categorical feature to numerical representation
data['Merchant City'] = data['Merchant City'].astype('category').cat.codes

# Convert categorical feature to numerical representation
data['Errors'] = data['Errors'].astype('category').cat.codes



# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data.drop('IsFraud', axis=1), data['IsFraud'], test_size=0.25, random_state=80)

# Train RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)

# Make predictions on test set
y_pred = clf.predict(X_test)

# Calculate accuracy score
accuracy = accuracy_score(y_test, y_pred)

print("Accuracy:", accuracy)

# Prepare new sample data
new_sample =  pd.read_csv('test.csv')

# Convert time feature to numerical representation
new_sample['Time'] = pd.to_datetime(new_sample['Time'], format='%M:%S').dt.minute * 60 + pd.to_datetime(new_sample['Time'], format='%M:%S').dt.second

# Convert currency feature to numerical representation
new_sample['Amount'] = new_sample['Amount']


# Convert categorical feature to numerical representation
new_sample['Use Chip'] = new_sample['Use Chip'].astype('category').cat.codes

# Convert categorical feature to numerical representation
new_sample['Merchant State'] = new_sample['Merchant State'].astype('category').cat.codes

# Convert categorical feature to numerical representation
new_sample['Merchant City'] = new_sample['Merchant City'].astype('category').cat.codes

# Convert categorical feature to numerical representation
new_sample['Errors'] = new_sample['Errors'].astype('category').cat.codes

# Predict the target variable for the new sample
y_pred = clf.predict(new_sample)

# Print the predicted target variable values
print(y_pred)

