In [1]:
from sagemaker.tensorflow import TensorFlow
from sagemaker import get_execution_role

role = get_execution_role()

estimator = TensorFlow(
    entry_point='train.py',
    role=role,
    instance_count=1,
    instance_type='ml.m5.large',
    framework_version='2.11.0',
    py_version='py39',
    script_mode=True,
    input_mode='File'
)

s3_input_path = 's3://chicagocrime1/df_clean.csv'
estimator.fit({'training': s3_input_path})




sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


2025-04-25 04:23:03 Starting - Starting the training job...
2025-04-25 04:23:16 Starting - Preparing the instances for training...
2025-04-25 04:23:38 Downloading - Downloading input data...
2025-04-25 04:24:34 Downloading - Downloading the training image........[34m2025-04-25 04:25:44.430633: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX512F[0m
[34mTo enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.[0m
[34m2025-04-25 04:25:44.541900: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:460] Initializing the SageMaker Profiler.[0m
[34m2025-04-25 04:25:44.542546: W tensorflow/core/profiler/internal/smprofiler_timeline.cc:105] SageMaker Profiler is not enabled. The timeline writer thread will not be started, future recorded events will be dropped.[0m
[34m2025-04-25 

In [9]:
import boto3
import tarfile
import os
from tensorflow.keras.models import load_model
from sklearn.metrics import classification_report, roc_auc_score


# Step 1: download model.tar.gz file

bucket = 'sagemaker-us-east-1-173750038428'
key = 'tensorflow-training-2025-04-25-03-03-22-442/output/model.tar.gz'

s3 = boto3.client('s3')
local_tar = 'model.tar.gz'

s3.download_file(bucket, key, local_tar)
print(f"✅ Model Download: {local_tar}")


# Step 2: extract model

extract_dir = 'model_extracted'
os.makedirs(extract_dir, exist_ok=True)

with tarfile.open(local_tar) as tar:
    tar.extractall(path=extract_dir)

print(f"✅ model Extract to: {extract_dir}")
model_path = os.path.join(extract_dir, 'crime_theft_rnn.h5')  


# Step 3: load model

model = load_model(model_path)
print("✅ Model loading Successfully")


# Step 4: evaluation


y_pred_prob = model.predict(X_test)
y_pred = (y_pred_prob > 0.5).astype(int)

print("\n🔍 Classification Report:")
print(classification_report(y_test, y_pred))

print("🎯 AUC Score:", roc_auc_score(y_test, y_pred_prob))


✅ 模型已下载: model.tar.gz
✅ 模型已解压到: model_extracted
✅ 模型加载成功

🔍 Classification Report:
              precision    recall  f1-score   support

           0       0.34      0.57      0.42      4313
           1       0.82      0.64      0.72     13495

    accuracy                           0.62     17808
   macro avg       0.58      0.61      0.57     17808
weighted avg       0.71      0.62      0.65     17808

🎯 AUC Score: 0.6417265860117534


In [8]:

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA

df = pd.read_csv("df_clean.csv")  
df['Date'] = pd.to_datetime(df['Date'])
df['Hour'] = df['Date'].dt.floor('h')


hourly = df.groupby(['Hour', 'Primary Type']).size().unstack(fill_value=0)
hourly['total'] = hourly.sum(axis=1)
hourly = hourly.sort_index()

target_crime = 25
if target_crime not in hourly.columns:
    hourly[target_crime] = 0

window_size = 12
X_seq, y_seq = [], []
features = list(hourly.columns)

for i in range(len(hourly) - window_size - 1):
    x_window = hourly.iloc[i:i+window_size][features].values
    y_label = 1 if hourly.iloc[i + window_size][target_crime] > 0 else 0
    X_seq.append(x_window)
    y_seq.append(y_label)

X = np.array(X_seq)
y = np.array(y_seq)

n_features = X.shape[2]
X_flat = X.reshape(-1, n_features)  
pca = PCA(n_components=20)
X_reduced = pca.fit_transform(X_flat)  
X = X_reduced.reshape(-1, window_size, 20)


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)



In [7]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                26112     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 26177 (102.25 KB)
Trainable params: 26177 (102.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
