In [1]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, roc_auc_score
import joblib

def train_model(df, target_column='isFraud', test_size=0.2, random_state=42):
    """
    Train a machine learning model (XGBoost) on the provided dataframe.
    
    Parameters:
    - df: The dataframe containing features and target.
    - target_column: The name of the target column.
    - test_size: Proportion of the dataset to be used for testing.
    - random_state: Random state for reproducibility.
    
    Returns:
    - model: Trained XGBoost model.
    - X_test: Test feature data.
    - y_test: Test target data.
    - accuracy: Model accuracy on the test data.
    - auc: Model AUC score on the test data.
    """
    # Prepare the data
    X = df.drop(columns=[target_column])
    y = df[target_column]
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=random_state)
    
    # Initialize the XGBoost model
    model = XGBClassifier(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=random_state)
    
    # Fit the model
    model.fit(X_train, y_train)
    
    # Make predictions
    y_pred = model.predict(X_test)
    
    # Evaluate the model
    accuracy = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    
    print(f"Model Accuracy: {accuracy}")
    print(f"Model AUC: {auc}")
    
    return model, X_test, y_test, accuracy, auc

def save_model(model, filename='model.pkl'):
    """
    Save the trained model to a file.
    
    Parameters:
    - model: The trained model to be saved.
    - filename: The name of the file to save the model.
    """
    with open(filename, 'wb') as f:
        joblib.dump(model, f)
    print(f"Model saved to {filename}")

def load_model(filename='model.pkl'):
    """
    Load a trained model from a file.
    
    Parameters:
    - filename: The name of the file to load the model from.
    
    Returns:
    - model: The loaded model.
    """
    with open(filename, 'rb') as f:
        model = joblib.load(f)
    print(f"Model loaded from {filename}")
    return model

def evaluate_model(model, X_test, y_test):
    """
    Evaluate the trained model on test data.
    
    Parameters:
    - model: The trained model.
    - X_test: The test feature data.
    - y_test: The test target data.
    
    Returns:
    - accuracy: Model accuracy on the test data.
    - auc: Model AUC score on the test data.
    """
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1])
    
    print(f"Model Accuracy: {accuracy}")
    print(f"Model AUC: {auc}")
    
    return accuracy, auc


In [4]:
df = pd.read_csv(r"C:\Users\tekla\Documents\Machine Learning\train_transaction.csv")

# Check the shape of the dataframe
print(f"Training data shape: {df.shape}")

FileNotFoundError: [Errno 2] No such file or directory: 'C:\\Users\\tekla\\Documents\\Machine Learning\\train_transaction.csv'

In [5]:
import os
print(os.getcwd())  # This shows the current working directory

# If needed, change to the correct working directory
os.chdir(r"C:\Users\tekla\Documents\Machine Learning")


C:\Users\tekla\OneDrive\Documents\Machine Learning\ML-ieee-cis-fraud-detection\notebooks


FileNotFoundError: [WinError 2] The system cannot find the file specified: 'C:\\Users\\tekla\\Documents\\Machine Learning'