In [3]:
# Financial and Risk Prediction Pipeline
#This notebook is designed to build a versatile pipeline that can handle predictions related to fraud detection, loan defaults, customer predictions, and other financial risk modeling tasks. The pipeline will include feature alignment, scaling, and model predictions.

# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestClassifier
from sklearn.base import BaseEstimator, TransformerMixin
import joblib

# Additional imports for evaluation and visualization
from sklearn.metrics import classification_report, roc_auc_score, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt


In [4]:
# Standard feature list for the pipeline
standard_features = [
    'Transaction_Frequency', 'Spending_Patterns', 'Time_Since_Last_Transaction',  # Behavioral Patterns
    'Previous_Fraud_Flag', 'Account_Age_Days', 'Location_Deviation',  # Risk Indicators
    'Customer_Segment', 'Account_Balance_Risk_Score',  # User/Account-Based Features
    'Is_Transaction_Anomalous', 'Failed_Login_Attempts',  # Anomaly Detection
    'Device_Consistency_Flag', 'IP_Address_Change_Flag',  # Device and Network Information
    'Global_Avg_Transaction_Amount', 'Fraud_Rate_In_Similar_Transactions',  # Aggregated Features
    'Hour_of_Day', 'Day_of_Week', 'Transaction_Amount'  # Temporal and Numerical Features
]


In [5]:
class FeatureAligner(BaseEstimator, TransformerMixin):
    def __init__(self, standard_features):
        self.standard_features = standard_features
    
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        # Reindex the dataframe to include all standard features, fill missing ones with 0
        aligned_df = X.reindex(columns=self.standard_features, fill_value=0)
        return aligned_df


In [6]:
pipeline = Pipeline([
    ('feature_aligner', FeatureAligner(standard_features=standard_features)),  # Align features
    ('scaler', StandardScaler()),  # Scale the features
    ('model', RandomForestClassifier())  # Placeholder model (can be replaced)
])


In [None]:
import os
import joblib

# Upload the model and scaler (adjust the paths based on your file locations)
model_path = '/kaggle/input/model-and-scaler/DatosX-Meta.pkl'  # Adjust this path
scaler_path = '/kaggle/input/model-and-scaler/new_fraud_detection_scaler (3).pkl'  # Adjust this path

# Load the pre-trained model and scaler
if os.path.exists(model_path):
    pre_trained_model = joblib.load(model_path)
    print("Model loaded successfully.")
else:
    print("Model file not found.")

if os.path.exists(scaler_path):
    pre_trained_scaler = joblib.load(scaler_path)
    print("Scaler loaded successfully.")
else:
    print("Scaler file not found.")



In [9]:
# Save the pipeline for future use
joblib.dump(pipeline, 'financial_risk_pipeline.pkl')


['financial_risk_pipeline.pkl']