In [1]:
# CELL 1: Setup and Imports
# ============================================================================
import sagemaker
from sklearn.model_selection import train_test_split
import boto3
import pandas as pd
sm_boto3 = boto3.client("sagemaker")
sess = sagemaker.Session()

region = sess.boto_session.region_name
bucket = 'deployment-s3-bucket-sagemaker'
print("Using bucket " + bucket)

sagemaker.config INFO - Not applying SDK defaults from location: C:\ProgramData\sagemaker\sagemaker\config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: C:\Users\DELL 5410\AppData\Local\sagemaker\sagemaker\config.yaml
Using bucket deployment-s3-bucket-sagemaker


In [2]:
# CELL 2: Additional Imports
# ============================================================================
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score


In [3]:
# CELL 3: Load Data
# ============================================================================
credit_card_data = pd.read_csv("creditcard.csv")
print("Data loaded successfully!")
print(f"Shape: {credit_card_data.shape}")


Data loaded successfully!
Shape: (284807, 31)


In [4]:
# CELL 4: Explore Data - First 5 Rows
# ============================================================================
credit_card_data.head()


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,0
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,0
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,0


In [5]:
# CELL 5: Data Information
# ============================================================================
credit_card_data.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 284807 entries, 0 to 284806
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    284807 non-null  float64
 1   V1      284807 non-null  float64
 2   V2      284807 non-null  float64
 3   V3      284807 non-null  float64
 4   V4      284807 non-null  float64
 5   V5      284807 non-null  float64
 6   V6      284807 non-null  float64
 7   V7      284807 non-null  float64
 8   V8      284807 non-null  float64
 9   V9      284807 non-null  float64
 10  V10     284807 non-null  float64
 11  V11     284807 non-null  float64
 12  V12     284807 non-null  float64
 13  V13     284807 non-null  float64
 14  V14     284807 non-null  float64
 15  V15     284807 non-null  float64
 16  V16     284807 non-null  float64
 17  V17     284807 non-null  float64
 18  V18     284807 non-null  float64
 19  V19     284807 non-null  float64
 20  V20     284807 non-null  float64
 21  V21     28

In [6]:
# CELL 6: Check Missing Values
# ============================================================================
credit_card_data.isnull().sum()


# ============================================================================
credit_card_data['Class'].value_counts()


Class
0    284315
1       492
Name: count, dtype: int64

In [7]:
# CELL 8: Separate Classes
# ============================================================================
legit = credit_card_data[credit_card_data.Class == 0]
fraud = credit_card_data[credit_card_data.Class == 1]
print(f"Legitimate transactions: {legit.shape}")
print(f"Fraudulent transactions: {fraud.shape}")


Legitimate transactions: (284315, 31)
Fraudulent transactions: (492, 31)


In [8]:
# CELL 9: Statistical Analysis - Legitimate Transactions
# ============================================================================
legit.Amount.describe()

count    284315.000000
mean         88.291022
std         250.105092
min           0.000000
25%           5.650000
50%          22.000000
75%          77.050000
max       25691.160000
Name: Amount, dtype: float64

In [9]:
# CELL 10: Statistical Analysis - Fraudulent Transactions
# ============================================================================
fraud.Amount.describe()


count     492.000000
mean      122.211321
std       256.683288
min         0.000000
25%         1.000000
50%         9.250000
75%       105.890000
max      2125.870000
Name: Amount, dtype: float64

In [10]:
# CELL 11: Compare Mean Values by Class
# ============================================================================
credit_card_data.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,94838.202258,0.008258,-0.006271,0.012171,-0.00786,0.005453,0.002419,0.009637,-0.000987,0.004467,...,-0.000644,-0.001235,-2.4e-05,7e-05,0.000182,-7.2e-05,-8.9e-05,-0.000295,-0.000131,88.291022
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


In [11]:
# CELL 12: Create Balanced Dataset - Sample Legitimate Transactions
# ============================================================================
legit_sample = legit.sample(n=492, random_state=42)
print(f"Sampled legitimate transactions: {legit_sample.shape}")

Sampled legitimate transactions: (492, 31)


In [12]:
# CELL 13: Concatenate to Create Balanced Dataset
# ============================================================================
new_dataset = pd.concat([legit_sample, fraud], axis=0)
print(f"Balanced dataset shape: {new_dataset.shape}")


Balanced dataset shape: (984, 31)


In [13]:
# CELL 14: Verify Balanced Dataset
# ============================================================================
new_dataset.tail()


Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
279863,169142.0,-1.927883,1.125653,-4.518331,1.749293,-1.566487,-2.010494,-0.88285,0.697211,-2.064945,...,0.778584,-0.319189,0.639419,-0.294885,0.537503,0.788395,0.29268,0.147968,390.0,1
280143,169347.0,1.378559,1.289381,-5.004247,1.41185,0.442581,-1.326536,-1.41317,0.248525,-1.127396,...,0.370612,0.028234,-0.14564,-0.081049,0.521875,0.739467,0.389152,0.186637,0.76,1
280149,169351.0,-0.676143,1.126366,-2.2137,0.468308,-1.120541,-0.003346,-2.234739,1.210158,-0.65225,...,0.751826,0.834108,0.190944,0.03207,-0.739695,0.471111,0.385107,0.194361,77.89,1
281144,169966.0,-3.113832,0.585864,-5.39973,1.817092,-0.840618,-2.943548,-2.208002,1.058733,-1.632333,...,0.583276,-0.269209,-0.456108,-0.183659,-0.328168,0.606116,0.884876,-0.2537,245.0,1
281674,170348.0,1.991976,0.158476,-2.583441,0.40867,1.151147,-0.096695,0.22305,-0.068384,0.577829,...,-0.16435,-0.295135,-0.072173,-0.450261,0.313267,-0.289617,0.002988,-0.015309,42.53,1


In [14]:
# CELL 15: Check Class Distribution in Balanced Dataset
# ============================================================================
new_dataset['Class'].value_counts()

Class
0    492
1    492
Name: count, dtype: int64

In [15]:
# ============================================================================
# CELL 16: Mean Values in Balanced Dataset
# ============================================================================
new_dataset.groupby('Class').mean()

Unnamed: 0_level_0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V20,V21,V22,V23,V24,V25,V26,V27,V28,Amount
Class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,95052.75813,0.153312,0.009649,-0.038029,-0.027323,0.061966,-0.053962,0.013795,0.014911,0.037348,...,0.01503,0.014059,-0.020781,0.013223,-0.007257,0.024646,-0.027696,0.01107,-0.002305,80.348354
1,80746.806911,-4.771948,3.623778,-7.033281,4.542029,-3.151225,-1.397737,-5.568731,0.570636,-2.581123,...,0.372319,0.713588,0.014049,-0.040308,-0.10513,0.041449,0.051648,0.170575,0.075667,122.211321


In [16]:
# CELL 17: Prepare Features and Labels
# ============================================================================
X = new_dataset.drop(columns='Class', axis=1)
Y = new_dataset['Class']
print(f"Features shape: {X.shape}")
print(f"Labels shape: {Y.shape}")

Features shape: (984, 30)
Labels shape: (984,)


In [17]:
# CELL 18: Split Data with Labels Included - CORRECTED VERSION
# ============================================================================
# Split the data into train and test sets
X_train, X_test, Y_train, Y_test = train_test_split(
    X, Y, test_size=0.2, stratify=Y, random_state=2
)

print("="*60)
print("DATA SPLIT SUMMARY")
print("="*60)
print(f"X shape: {X.shape}")
print(f"X_train shape: {X_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"Y_train shape: {Y_train.shape}")
print(f"Y_test shape: {Y_test.shape}")

# CRITICAL: Combine features and labels for SageMaker
# The label MUST be the LAST column in the CSV
train_data = pd.concat([
    pd.DataFrame(X_train.reset_index(drop=True)), 
    pd.DataFrame(Y_train.reset_index(drop=True))
], axis=1)

test_data = pd.concat([
    pd.DataFrame(X_test.reset_index(drop=True)), 
    pd.DataFrame(Y_test.reset_index(drop=True))
], axis=1)

print(f"\nTrain data shape (with labels): {train_data.shape}")
print(f"Test data shape (with labels): {test_data.shape}")
print(f"\nTrain label distribution:")
print(train_data.iloc[:, -1].value_counts())
print(f"\nTest label distribution:")
print(test_data.iloc[:, -1].value_counts())

# Save to CSV without headers or index
train_data.to_csv("train.csv", index=False, header=False)
test_data.to_csv("test.csv", index=False, header=False)

print("\n" + "="*60)
print("CSV FILES CREATED SUCCESSFULLY")
print("="*60)

DATA SPLIT SUMMARY
X shape: (984, 30)
X_train shape: (787, 30)
X_test shape: (197, 30)
Y_train shape: (787,)
Y_test shape: (197,)

Train data shape (with labels): (787, 31)
Test data shape (with labels): (197, 31)

Train label distribution:
Class
1    394
0    393
Name: count, dtype: int64

Test label distribution:
Class
0    99
1    98
Name: count, dtype: int64

CSV FILES CREATED SUCCESSFULLY


In [18]:
#  CELL 19: Verify CSV Files
# ============================================================================
print("="*60)
print("VERIFICATION - Checking the saved CSV files")
print("="*60)

# Verify train.csv
print("\nüìÑ Train.csv:")
with open("train.csv", 'r') as f:
    lines = f.readlines()
    print(f"  ‚úì Number of lines: {len(lines)}")
    first_line = lines[0].strip()
    values = first_line.split(',')
    print(f"  ‚úì Number of columns: {len(values)}")
    print(f"  ‚úì First 3 values: {', '.join(values[:3])}")
    print(f"  ‚úì Last 3 values: {', '.join(values[-3:])}")
    print(f"  ‚úì LAST VALUE (label): {values[-1]}")
    
    # Verify it's a valid label
    if values[-1] in ['0', '0.0', '1', '1.0']:
        print("  ‚úÖ Label column is present and correct!")
    else:
        print("  ‚ùå ERROR: Last column is not a valid label!")

# Verify test.csv  
print("\nüìÑ Test.csv:")
with open("test.csv", 'r') as f:
    lines = f.readlines()
    print(f"  ‚úì Number of lines: {len(lines)}")
    first_line = lines[0].strip()
    values = first_line.split(',')
    print(f"  ‚úì Number of columns: {len(values)}")
    print(f"  ‚úì First 3 values: {', '.join(values[:3])}")
    print(f"  ‚úì Last 3 values: {', '.join(values[-3:])}")
    print(f"  ‚úì LAST VALUE (label): {values[-1]}")
    
    # Verify it's a valid label
    if values[-1] in ['0', '0.0', '1', '1.0']:
        print("  ‚úÖ Label column is present and correct!")
    else:
        print("  ‚ùå ERROR: Last column is not a valid label!")

print("\n" + "="*60)


VERIFICATION - Checking the saved CSV files

üìÑ Train.csv:
  ‚úì Number of lines: 787
  ‚úì Number of columns: 31
  ‚úì First 3 values: 67571.0, -0.758468652663917, -0.0454102784513458
  ‚úì Last 3 values: -0.117140114799884, 549.06, 1
  ‚úì LAST VALUE (label): 1
  ‚úÖ Label column is present and correct!

üìÑ Test.csv:
  ‚úì Number of lines: 197
  ‚úì Number of columns: 31
  ‚úì First 3 values: 153761.0, 1.14625926661879, 1.40345824321907
  ‚úì Last 3 values: 0.243744113775662, 51.37, 1
  ‚úì LAST VALUE (label): 1
  ‚úÖ Label column is present and correct!



In [19]:
# CELL 20: Upload Data to S3
# ============================================================================
sk_prefix = "sagemaker/fraude_detection/sklearncontainer"

print("Uploading data to S3...")
trainpath = sess.upload_data(
    path="train.csv", bucket=bucket, key_prefix=sk_prefix
)
print(f"‚úì Training data uploaded to: {trainpath}")

testpath = sess.upload_data(
    path="test.csv", bucket=bucket, key_prefix=sk_prefix
)
print(f"‚úì Test data uploaded to: {testpath}")


Uploading data to S3...
‚úì Training data uploaded to: s3://deployment-s3-bucket-sagemaker/sagemaker/fraude_detection/sklearncontainer/train.csv
‚úì Test data uploaded to: s3://deployment-s3-bucket-sagemaker/sagemaker/fraude_detection/sklearncontainer/test.csv


In [20]:
# # CELL 21: Create Training Script (script.py)
# # ============================================================================

# import argparse
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.metrics import accuracy_score, classification_report
# import sklearn
# import joblib
# import os
# import numpy as np
# import pandas as pd
# import sys
# import traceback

# def model_fn(model_dir):
#     """Load the model for inference"""
#     clf = joblib.load(os.path.join(model_dir, "model.joblib"))
#     return clf

# if __name__ == "__main__":
#     try:
#         print("[INFO] Starting script execution")
#         print("[INFO] Python version:", sys.version)
#         print("[INFO] Scikit-learn version:", sklearn.__version__)
        
#         parser = argparse.ArgumentParser()
        
#         # Hyperparameters
#         parser.add_argument("--n_estimators", type=int, default=100)
#         parser.add_argument("--random_state", type=int, default=0)
#         parser.add_argument("--verbose", type=int, default=1)

#         # Directories
#         parser.add_argument("--model-dir", type=str, default=os.environ.get("SM_MODEL_DIR"))
#         parser.add_argument("--train", type=str, default=os.environ.get("SM_CHANNEL_TRAIN"))
#         parser.add_argument("--test", type=str, default=os.environ.get("SM_CHANNEL_TEST"))
        
#         args, _ = parser.parse_known_args()
#         print("[INFO] Arguments:", vars(args))
        
#         # Load data
#         print("\n[INFO] Loading training data...")
#         train_path = os.path.join(args.train, "train.csv")
#         test_path = os.path.join(args.test, "test.csv")
        
#         print(f"[INFO] Train path: {train_path}")
#         print(f"[INFO] Test path: {test_path}")
        
#         # Read CSV without header
#         train_df = pd.read_csv(train_path, header=None)
#         test_df = pd.read_csv(test_path, header=None)
        
#         print(f"[INFO] Train shape: {train_df.shape}")
#         print(f"[INFO] Test shape: {test_df.shape}")
#         print(f"[INFO] Train columns: {train_df.shape[1]}")
        
#         # Split features and labels - assuming last column is the target
#         X_train = train_df.iloc[:, :-1].values
#         y_train = train_df.iloc[:, -1].values
#         X_test = test_df.iloc[:, :-1].values
#         y_test = test_df.iloc[:, -1].values
        
#         print(f"[INFO] X_train shape: {X_train.shape}")
#         print(f"[INFO] y_train shape: {y_train.shape}")
#         print(f"[INFO] Class distribution in training: {np.unique(y_train, return_counts=True)}")
        
#         # Train model
#         print("\n[INFO] Training RandomForest Model...")
#         model = RandomForestClassifier(
#             n_estimators=args.n_estimators,
#             random_state=args.random_state,
#             verbose=args.verbose,
#             n_jobs=-1
#         )
        
#         model.fit(X_train, y_train)
#         print("[INFO] Training completed!")
        
#         # Evaluate on training data
#         train_pred = model.predict(X_train)
#         train_acc = accuracy_score(y_train, train_pred)
#         print(f"\n[INFO] Training Accuracy: {train_acc:.4f}")
        
#         # Evaluate on test data
#         print("\n[INFO] Evaluating model on test set...")
#         y_pred = model.predict(X_test)
#         test_acc = accuracy_score(y_test, y_pred)
#         print(f"[INFO] Test Accuracy: {test_acc:.4f}")
        
#         print("\n[INFO] Classification Report:")
#         print(classification_report(y_test, y_pred))
        
#         # Save model
#         print("\n[INFO] Saving model...")
#         os.makedirs(args.model_dir, exist_ok=True)
#         model_path = os.path.join(args.model_dir, "model.joblib")
#         joblib.dump(model, model_path)
#         print(f"[INFO] Model saved to: {model_path}")
        
#         # Verify model was saved
#         if os.path.exists(model_path):
#             print(f"[INFO] Model file size: {os.path.getsize(model_path)} bytes")
#         else:
#             print("[ERROR] Model file was not created!")
        
#         print("\n[INFO] Script completed successfully!")
        
#     except Exception as e:
#         print("\n[ERROR] An error occurred:")
#         print(traceback.format_exc())
#         sys.exit(1)


In [21]:
# CELL 22: Configure SageMaker Estimator
# ============================================================================
from sagemaker.sklearn.estimator import SKLearn

FRAMEWORK_VERSION = "0.23-1"
SAGEMAKER_ROLE = "arn:aws:iam::663247060623:role/service-role/SageMaker-SageMaker-roleaccess"

# Define hyperparameters
hyperparameters = {
    "n_estimators": 100,
    "random_state": 0,
    "verbose": 1
}

# Create estimator
sklearn_estimator = SKLearn(
    entry_point="script.py",
    role=SAGEMAKER_ROLE,
    instance_count=1,
    instance_type="ml.c5.xlarge",
    framework_version=FRAMEWORK_VERSION,
    base_job_name="RF-fraud-detection",
    hyperparameters=hyperparameters,
    max_run=3600
)

print("‚úì SageMaker estimator configured successfully!")

‚úì SageMaker estimator configured successfully!


In [22]:
 #CELL 23: Start Training Job
# ============================================================================
print("Starting training job...")
print("="*60)

sklearn_estimator.fit(
    {"train": trainpath, "test": testpath},
    wait=True
)

print("="*60)
print("‚úì Training completed successfully!")

Starting training job...


INFO:sagemaker:Creating training-job with name: RF-fraud-detection-2025-11-11-13-27-32-886


2025-11-11 13:22:47 Starting - Starting the training job...
2025-11-11 13:23:21 Starting - Preparing the instances for training...
2025-11-11 13:23:42 Downloading - Downloading input data...
2025-11-11 13:24:56 Training - Training image download completed. Training in progress.
2025-11-11 13:24:56 Uploading - Uploading generated training model
2025-11-11 13:24:56 Completed - Training job completed
..Training seconds: 73
Billable seconds: 73
‚úì Training completed successfully!


In [23]:
# CELL 24: Get Model Artifact Location
# ============================================================================
# Wait for job completion (if not already waited)
sklearn_estimator.latest_training_job.wait(logs="None")

# Get model artifact
artifact = sm_boto3.describe_training_job(
    TrainingJobName=sklearn_estimator.latest_training_job.name
)["ModelArtifacts"]["S3ModelArtifacts"]

print("="*60)
print("MODEL TRAINING COMPLETE")
print("="*60)
print(f"Model artifact persisted at: {artifact}")
print(f"Training job name: {sklearn_estimator.latest_training_job.name}")
print("="*60)


2025-11-11 13:24:56 Starting - Preparing the instances for training
2025-11-11 13:24:56 Downloading - Downloading the training image
2025-11-11 13:24:56 Training - Training image download completed. Training in progress.
2025-11-11 13:24:56 Uploading - Uploading generated training model
2025-11-11 13:24:56 Completed - Training job completed
MODEL TRAINING COMPLETE
Model artifact persisted at: s3://sagemaker-us-east-1-663247060623/RF-fraud-detection-2025-11-11-13-27-32-886/output/model.tar.gz
Training job name: RF-fraud-detection-2025-11-11-13-27-32-886


In [24]:
# CELL 25: Create SageMaker Model Object
# ============================================================================
from sagemaker.sklearn.model import SKLearnModel
from time import gmtime, strftime

print("Creating SageMaker Model object...")
print("="*60)

# Create unique model name with timestamp
model_name = "fraud-detection-model-" + strftime("%Y-%m-%d-%H-%M-%S", gmtime())
print(f"Model Name: {model_name}")

# Create the Model object
model = SKLearnModel(
    name=model_name,
    model_data=artifact,  # From Cell 24
    role=SAGEMAKER_ROLE,  # From Cell 22
    entry_point="script.py",
    framework_version=FRAMEWORK_VERSION,  # From Cell 22
    sagemaker_session=sess
)

print(f"‚úì Model object created successfully!")
print(f"‚úì Model artifact: {artifact}")
print(f"‚úì Entry point: script.py")
print(f"‚úì Framework version: {FRAMEWORK_VERSION}")
print("="*60)
print("\n‚úÖ Model is ready for deployment!")

Creating SageMaker Model object...
Model Name: fraud-detection-model-2025-11-11-13-30-24
‚úì Model object created successfully!
‚úì Model artifact: s3://sagemaker-us-east-1-663247060623/RF-fraud-detection-2025-11-11-13-27-32-886/output/model.tar.gz
‚úì Entry point: script.py
‚úì Framework version: 0.23-1

‚úÖ Model is ready for deployment!


In [25]:
# CELL 26: Deploy Model to Endpoint
# ============================================================================
print("Deploying model to SageMaker endpoint...")
print("="*60)
print("‚ö†Ô∏è  This will take 5-10 minutes to complete")
print("="*60)

# Deploy the model
predictor = model.deploy(
    initial_instance_count=1,
    instance_type="ml.t2.medium",  # Cost-effective instance for testing
    endpoint_name=model_name  # Use the same name as the model
)

print("="*60)
print("‚úÖ DEPLOYMENT COMPLETE!")
print("="*60)
print(f"Endpoint Name: {predictor.endpoint_name}")
print(f"Instance Type: ml.t2.medium")
print(f"Instance Count: 1")
print("="*60)
print("\nüöÄ Your fraud detection model is now live and ready to make predictions!")

Deploying model to SageMaker endpoint...
‚ö†Ô∏è  This will take 5-10 minutes to complete


INFO:sagemaker:Creating model with name: fraud-detection-model-2025-11-11-13-30-24
INFO:sagemaker:Creating endpoint-config with name fraud-detection-model-2025-11-11-13-30-24
INFO:sagemaker:Creating endpoint with name fraud-detection-model-2025-11-11-13-30-24


‚úÖ DEPLOYMENT COMPLETE!
Endpoint Name: fraud-detection-model-2025-11-11-13-30-24
Instance Type: ml.t2.medium
Instance Count: 1

üöÄ Your fraud detection model is now live and ready to make predictions!


In [26]:
# CELL 27: Test the Endpoint with Predictions
# ============================================================================
import numpy as np

print("Testing the deployed model...")
print("="*60)

# Get some test samples from your test set
test_samples = X_test[:5]  # First 5 transactions from test set
actual_labels = Y_test[:5]

print("Making predictions on 5 test transactions...\n")

# Make predictions
predictions = predictor.predict(test_samples)

print("="*60)
print("PREDICTION RESULTS")
print("="*60)

for i, (pred, actual) in enumerate(zip(predictions, actual_labels)):
    result = "‚úì CORRECT" if pred == actual else "‚úó INCORRECT"
    fraud_status_pred = "FRAUD" if pred == 1 else "LEGITIMATE"
    fraud_status_actual = "FRAUD" if actual == 1 else "LEGITIMATE"
    
    print(f"\nTransaction {i+1}:")
    print(f"  Predicted: {fraud_status_pred} ({pred})")
    print(f"  Actual:    {fraud_status_actual} ({actual})")
    print(f"  {result}")

print("\n" + "="*60)

# Test on entire test set
print("\nTesting on entire test set...")
all_predictions = predictor.predict(X_test)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

accuracy = accuracy_score(Y_test, all_predictions)
print(f"\n‚úÖ Overall Test Accuracy: {accuracy:.4f} ({accuracy*100:.2f}%)")

print("\nüìä Confusion Matrix:")
print(confusion_matrix(Y_test, all_predictions))

print("\nüìã Classification Report:")
print(classification_report(Y_test, all_predictions, target_names=['Legitimate', 'Fraud']))

print("="*60)


Testing the deployed model...
Making predictions on 5 test transactions...

PREDICTION RESULTS

Transaction 1:
  Predicted: FRAUD (1)
  Actual:    FRAUD (1)
  ‚úì CORRECT

Transaction 2:
  Predicted: LEGITIMATE (0)
  Actual:    LEGITIMATE (0)
  ‚úì CORRECT

Transaction 3:
  Predicted: LEGITIMATE (0)
  Actual:    LEGITIMATE (0)
  ‚úì CORRECT

Transaction 4:
  Predicted: LEGITIMATE (0)
  Actual:    LEGITIMATE (0)
  ‚úì CORRECT

Transaction 5:
  Predicted: FRAUD (1)
  Actual:    FRAUD (1)
  ‚úì CORRECT


Testing on entire test set...

‚úÖ Overall Test Accuracy: 0.9239 (92.39%)

üìä Confusion Matrix:
[[98  1]
 [14 84]]

üìã Classification Report:
              precision    recall  f1-score   support

  Legitimate       0.88      0.99      0.93        99
       Fraud       0.99      0.86      0.92        98

    accuracy                           0.92       197
   macro avg       0.93      0.92      0.92       197
weighted avg       0.93      0.92      0.92       197



In [27]:
# CELL 28: Delete Endpoint (IMPORTANT - Run this when done!)
# ============================================================================
print("\n\n" + "="*60)
print("‚ö†Ô∏è  IMPORTANT: DELETE ENDPOINT TO STOP CHARGES")
print("="*60)
print("\nUncomment and run the code below when you're done testing:")
print("(This will stop the AWS charges)\n")

# Uncomment the line below to delete the endpoint
#predictor.delete_endpoint()
#predictor.delete_endpoint()
print("""
 To delete the endpoint, uncomment and run:
 predictor.delete_endpoint()
 print("‚úÖ Endpoint deleted successfully!")
""")

print("="*60)
print("Current Status: Endpoint is RUNNING and incurring charges")
print("="*60)



‚ö†Ô∏è  IMPORTANT: DELETE ENDPOINT TO STOP CHARGES

Uncomment and run the code below when you're done testing:
(This will stop the AWS charges)


 To delete the endpoint, uncomment and run:
 predictor.delete_endpoint()
 print("‚úÖ Endpoint deleted successfully!")

Current Status: Endpoint is RUNNING and incurring charges
