In [9]:
!pip install imblearn

Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import f1_score
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [24]:
import sklearn
sklearn.__version__

'1.5.1'

In [13]:
# Load dataset (Replace 'data.csv' with the actual file path)
data = pd.read_csv(r"C:\Users\raajj\Downloads\Fraud\Fraud.csv")

In [16]:
# Step 2: Data Preprocessing
# Handle missing values (if any)
data = data.dropna()

# Encode categorical variables
label_encoder = LabelEncoder()
data['nameOrig'] = label_encoder.fit_transform(data['nameOrig'])
data['nameDest'] = label_encoder.fit_transform(data['nameDest'])

# Normalize numerical features
scaler = StandardScaler()
numerical_features = ['amount', 'oldbalanceOrg', 'newbalanceOrig', 'oldbalanceDest', 'newbalanceDest']
data[numerical_features] = scaler.fit_transform(data[numerical_features])

In [17]:
# Step 3: Feature Selection
features = ['amount', 'nameOrig', 'oldbalanceOrg', 'newbalanceOrig', 'nameDest', 'oldbalanceDest', 'newbalanceDest']
X = data[features]
y = data['isFraud']

In [18]:
# Step 4: Model Training
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [19]:
# Step 5: Model Evaluation
y_pred = model.predict(X_test)
f1 = f1_score(y_test, y_pred)
print(f'F1 Score: {f1}')

F1 Score: 0.8271304347826087


In [20]:
# Step 6: Save the Model
# Save the model to a file
model_filename = 'fraud_detection_model.joblib'
joblib.dump(model, model_filename)
print(f'Model saved to {model_filename}')

# Optionally, save the scaler and label encoder if needed for future predictions
scaler_filename = 'scaler.joblib'
joblib.dump(scaler, scaler_filename)
print(f'Scaler saved to {scaler_filename}')

label_encoder_filename = 'label_encoder.joblib'
joblib.dump(label_encoder, label_encoder_filename)
print(f'Label Encoder saved to {label_encoder_filename}')

Model saved to fraud_detection_model.joblib
Scaler saved to scaler.joblib
Label Encoder saved to label_encoder.joblib


In [23]:
import os
import pickle
# Step 6: Save the Model with Versioning
version = 1
model_filename = f'version{version}.pkl'

# Check if the file already exists to avoid overwriting
while os.path.exists(model_filename):
    version += 1
    model_filename = f'version{version}.pkl'

# Save the model to a file
with open(model_filename, 'wb') as file:
    pickle.dump(model, file)
print(f'Model saved to {model_filename}')

# Optionally, save the scaler and label encoder if needed for future predictions
scaler_filename = f'scaler_version{version}.pkl'
with open(scaler_filename, 'wb') as file:
    pickle.dump(scaler, file)
print(f'Scaler saved to {scaler_filename}')

label_encoder_filename = f'label_encoder_version{version}.pkl'
with open(label_encoder_filename, 'wb') as file:
    pickle.dump(label_encoder, file)
print(f'Label Encoder saved to {label_encoder_filename}')

Model saved to version2.pkl
Scaler saved to scaler_version2.pkl
Label Encoder saved to label_encoder_version2.pkl
