In [1]:
!tar -xzf model.tar.gz

tar: Ignoring unknown extended header keyword `LIBARCHIVE.creationtime'


In [None]:
!pip install tensorflow keras

In [6]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
import pandas as pd

df = pd.read_csv(fr"data/input/loan_data.csv")

# Prepare the data
df_binary = df.copy()
df_binary['person_gender'] = df['person_gender'].astype(str).str.strip().str.lower().map({'female': 0, 'male': 1})
df_binary['previous_loan_defaults_on_file'] = df['previous_loan_defaults_on_file'].map({'Yes': 1, 'No': 0})

# One hot encoding
df_encoded = pd.get_dummies(df_binary, columns=['person_education'], prefix='edu', dtype=int)
df_encoded['edu_High_School'] = df_encoded['edu_High School']
df_encoded = df_encoded.drop(columns=['edu_High School'])
df_encoded = pd.get_dummies(df_encoded, columns=['person_home_ownership'], prefix="home_own", dtype=int)
df_encoded = pd.get_dummies(df_encoded, columns=['loan_intent'], dtype=int)

# Scaling
columns_to_scale = [
    'person_age', 'person_income', 'loan_amnt', 'credit_score', 'loan_int_rate'
]
scaler = StandardScaler()

df_scaled = df_encoded.copy()
df_scaled[columns_to_scale] = scaler.fit_transform(df_encoded[columns_to_scale])

# Train-test split
from sklearn.model_selection import train_test_split
y = df_scaled['loan_status']
x = df_scaled.drop(['loan_status'], axis=1)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# Create a Random Forest Classifier
model = RandomForestClassifier(random_state=42)

# Train the model
model.fit(x_train, y_train)

#   predictions on the test set
y_pred = model.predict(x_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [10]:
import pickle

# Save the model to a file
with open('loan_data_model.pkl', 'wb') as file:
    pickle.dump(model, file)

In [8]:
print(f"accuracy: {accuracy}")
print(f"precision: {precision}")
print(f"recall: {recall}")
print(f"f1 score: {f1}")

accuracy: 0.9283333333333333
precision: 0.8947368421052632
recall: 0.7696517412935323
f1 score: 0.8274939823482215


In [19]:
# Load the model back from the file
with open('loan_data_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)
    
# Use the loaded model for predictions
predictions = loaded_model.predict(x_test)
print(predictions)
with open('data/output/predictions.txt', 'w') as pred:
    pred.write(str(list(predictions)))

[0 0 1 ... 0 0 1]


In [20]:
%%sh
# Specify an image name
image_name=tensorflow-inference
echo "image_name: ${image_name} ######################"

account=$(aws sts get-caller-identity --query Account --output text)
echo "account: ${account} ######################"

# Get the region defined in the current configuration (default to us-west-2 if none defined)
region=$(aws configure get region)
echo "region: ${region} ######################"

fullname="${account}.dkr.ecr.${region}.amazonaws.com/${image_name}:2.12.0-cpu-py310-pluto-2"
echo "fullname: ${fullname} ######################"

aws ecr describe-repositories --repository-names "${image_name}"
if [ $? -ne 0 ]
then
aws ecr create-repository --repository-name "${image_name}"
fi

image_name: tensorflow-inference ######################
account: 319840978863 ######################
region: us-east-1 ######################
fullname: 319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:2.12.0-cpu-py310-pluto-2 ######################



An error occurred (RepositoryNotFoundException) when calling the DescribeRepositories operation: The repository with name 'tensorflow-inference' does not exist in the registry with id '319840978863'


{
    "repository": {
        "repositoryArn": "arn:aws:ecr:us-east-1:319840978863:repository/tensorflow-inference",
        "registryId": "319840978863",
        "repositoryName": "tensorflow-inference",
        "repositoryUri": "319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference",
        "createdAt": 1731937483.712,
        "imageTagMutability": "MUTABLE",
        "imageScanningConfiguration": {
            "scanOnPush": false
        },
        "encryptionConfiguration": {
            "encryptionType": "AES256"
        }
    }
}


In [21]:
!aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 763104351884.dkr.ecr.us-east-1.amazonaws.com

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [None]:
!docker build -t 319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:mondo -f Dockerfile.inference .

In [23]:
!docker images

REPOSITORY                                                          TAG       IMAGE ID       CREATED         SIZE
319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference   mondo     6893cfe4a78b   5 minutes ago   4.53GB


In [24]:
!aws ecr get-login-password --region us-east-1 | docker login --username AWS --password-stdin 319840978863.dkr.ecr.us-east-1.amazonaws.com

https://docs.docker.com/engine/reference/commandline/login/#credentials-store

Login Succeeded


In [26]:
!docker push 319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:mondo

The push refers to repository [319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference]

[1Bf9b57e1c: Preparing 
[1B95acb709: Preparing 
[1B3763e5d2: Preparing 
[1B37bf1d61: Preparing 
[1B8df97c44: Preparing 
[1B764747b4: Preparing 
[1B1a68f579: Preparing 
[1B2eef5eea: Preparing 
[1Bb5bcc575: Preparing 
[1B5e5ce62f: Preparing 
[1B757a305d: Preparing 
[1Bc7cf6f28: Preparing 
[1B30092134: Preparing 
[1Bfb153852: Preparing 
[1B0903db8c: Preparing 
[1B19dec72a: Preparing 
[1Bdf04f233: Preparing 
[1Bf2dbc490: Preparing 
[1Bba0431f9: Preparing 
[1B71536788: Preparing 
[1B994107ae: Preparing 
[1B3a4f83e7: Preparing 
[1Bd6748243: Preparing 
[1Bf2c1e372: Preparing 
[1Bcd2b5d6d: Preparing 
[25B5acb709: Pushed   2.641GB/2.621GB5A[2K[25A[2K[25A[2K[22A[2K[26A[2K[22A[2K[21A[2K[22A[2K[19A[2K[20A[2K[22A[2K[25A[2K[22A[2K[25A[2K[25A[2K[19A[2K[25A[2K[18A[2K[25A[2K[16A[2K[17A[2K[18A[2K[25A[2K[18A[2K[25A[2KPushing  27.79MB

In [33]:

from sagemaker import get_execution_role
import boto3
import datetime
from time import gmtime, strftime

# https://docs.aws.amazon.com/sagemaker/latest/dg/realtime-endpoints-deployment.html

my_session = boto3.session.Session()
aws_region = my_session.region_name

sagemaker_client = boto3.client('sagemaker', region_name=aws_region)

sagemaker_role = get_execution_role()

model_name = 'modelLarocca'

# Create model
create_model_response = sagemaker_client.create_model(
    ModelName = model_name,
    ExecutionRoleArn = sagemaker_role,
    PrimaryContainer = {
        'Image': '319840978863.dkr.ecr.us-east-1.amazonaws.com/tensorflow-inference:mondo',
        'ModelDataUrl': 's3://itsar123-larocca/data/output/loan_data_model.pkl',
    })




In [34]:
# Create an endpoint config name. Here we create one based on the date  
# so it we can search endpoints based on creation time.
endpoint_config_name = 'prima-api-inferenza-larocca'

instance_type = 'ml.p2.xlarge'

endpoint_config_response = sagemaker_client.create_endpoint_config(
    EndpointConfigName=endpoint_config_name,
    # You will specify this name in a CreateEndpoint request.
    # List of ProductionVariant objects, one for each model that you want to host at this endpoint.
    ProductionVariants=[
        {
            "VariantName": "variant1", # The name of the production variant.
            "ModelName": model_name,
            "InstanceType": instance_type, # Specify the compute instance type.
            "InitialInstanceCount": 1 # Number of instances to launch initially.
        }
    ]
)

print(f"Created EndpointConfig: {endpoint_config_response['EndpointConfigArn']}")

# The name of the endpoint. The name must be unique within an AWS Region in your AWS account.
endpoint_name = 'prima-api-inferenza-larocca'

create_endpoint_response = sagemaker_client.create_endpoint(
                                            EndpointName=endpoint_name, 
                                            EndpointConfigName=endpoint_config_name)


Created EndpointConfig: arn:aws:sagemaker:us-east-1:319840978863:endpoint-config/prima-api-inferenza-larocca


In [35]:
!pip install flask

