In [None]:
import boto3

# Initialize the S3 client
s3 = boto3.client('s3')
bucket_name = 'your-bucket-name'
file_name = 'your-dataset.csv'

# Upload file to S3
s3.upload_file(file_name, bucket_name, file_name)
print(f"{file_name} uploaded to S3 bucket {bucket_name}")


In [None]:
# train.py
import argparse
import os
import boto3
import mlflow
from transformers import AutoModelForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
import pandas as pd

# Parse input arguments
parser = argparse.ArgumentParser()
parser.add_argument('--s3-data', type=str, default='')
parser.add_argument('--s3-output', type=str, default='')
args = parser.parse_args()

# Load data from S3
data = pd.read_csv(args.s3_data)

# Define X and y columns (customize as needed)
X = data['Text']
y = data['Label']

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define model and trainer
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=len(set(y)))
args = TrainingArguments(
    output_dir='./results',
    evaluation_strategy="epoch",
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=3,
    weight_decay=0.01
)

trainer = Trainer(
    model=model,
    args=args,
    train_dataset=X_train,
    eval_dataset=X_test
)

# Train and evaluate
trainer.train()
accuracy = trainer.evaluate()['eval_accuracy']

# Log to MLflow
mlflow.log_param("epochs", 3)
mlflow.log_metric("accuracy", accuracy)

# Save model to S3
model.save_pretrained(args.s3_output)
print("Model training completed and saved to S3")


In [None]:
import sagemaker
from sagemaker.pytorch import PyTorch

sagemaker_session = sagemaker.Session()
role = 'arn:aws:iam::YOUR_ACCOUNT_ID:role/service-role/AmazonSageMaker-ExecutionRole'

estimator = PyTorch(
    entry_point='train.py',
    role=role,
    framework_version='1.9.0',
    py_version='py3',
    instance_count=1,
    instance_type='ml.m5.xlarge',
    hyperparameters={'epochs': 3},
    output_path=f"s3://{bucket_name}/model-output"
)

# Run the training job
estimator.fit({'training': f"s3://{bucket_name}/{file_name}"})


In [None]:
# Deploy SageMaker endpoint for live inference ✓ (Requirement: Live Inference)
predictor = estimator.deploy(initial_instance_count=1, instance_type='ml.m5.large')

# Batch transform for batch inference ✓ (Requirement: Batch Inference)
transformer = estimator.transformer(instance_count=1, instance_type="ml.m5.large", output_path=f"s3://{bucket_name}/batch-output")
transformer.transform(data=f"s3://{bucket_name}/{file_name}", content_type='text/csv', split_type='Line')


In [None]:
import boto3

def lambda_handler(event, context):
    sagemaker = boto3.client('sagemaker')
    sns = boto3.client('sns')

    # SNS topic ARN (replace with your actual SNS topic ARN)
    sns_topic_arn = 'arn:aws:sns:your-region:your-account-id:ModelAccuracyAlerts'

    # Current accuracy passed from CloudWatch event
    current_accuracy = float(event['current_accuracy'])
    threshold = 0.80

    if current_accuracy < threshold:
        # Trigger model retraining
        sagemaker.start_training_job(
            TrainingJobName="retraining-job",
            AlgorithmSpecification={
                'TrainingImage': estimator.training_image_uri,
                'TrainingInputMode': 'File'
            },
            RoleArn=role,
            InputDataConfig=[
                {
                    'ChannelName': 'training',
                    'DataSource': {
                        'S3DataSource': {
                            'S3DataType': 'S3Prefix',
                            'S3Uri': f"s3://{bucket_name}/{file_name}",
                            'S3DataDistributionType': 'FullyReplicated'
                        }
                    }
                }
            ],
            OutputDataConfig={'S3OutputPath': f"s3://{bucket_name}/model-output"},
            ResourceConfig={
                'InstanceType': 'ml.m5.large',
                'InstanceCount': 1,
                'VolumeSizeInGB': 50
            },
            StoppingCondition={'MaxRuntimeInSeconds': 3600}
        )
        print("Model retraining job triggered due to accuracy drop.")

        # Send email alert via SNS
        sns_message = f"Model accuracy has dropped to {current_accuracy}. Retraining has been initiated."
        sns.publish(
            TopicArn=sns_topic_arn,
            Message=sns_message,
            Subject="Model Accuracy Alert - Retraining Triggered"
        )
    else:
        print("Model meets accuracy threshold. No retraining required.")


In [None]:
import boto3

# Function to deploy the trained model on AWS SageMaker
def deploy_model_on_aws(model_path, model_name='SentimentModel', instance_type='ml.m5.large'):
    sagemaker_client = boto3.client('sagemaker')

    # Specify container image for inference (change URI if using custom images)
    container = {
        'Image': '763104351884.dkr.ecr.us-west-2.amazonaws.com/pytorch-inference:1.5.0-cpu-py36-ubuntu16.04',
        'ModelDataUrl': model_path,
    }

    # Create the model in SageMaker
    sagemaker_client.create_model(
        ModelName=model_name,
        ExecutionRoleArn='YOUR_SAGEMAKER_EXECUTION_ROLE',
        PrimaryContainer=container
    )

    # Deploy endpoint configuration
    sagemaker_client.create_endpoint_config(
        EndpointConfigName=model_name,
        ProductionVariants=[{
            'VariantName': 'AllTraffic',
            'ModelName': model_name,
            'InstanceType': instance_type,
            'InitialInstanceCount': 1
        }]
    )

    # Create the endpoint
    sagemaker_client.create_endpoint(
        EndpointName=model_name,
        EndpointConfigName=model_name
    )
    print(f"Model deployed on endpoint: {model_name}")

# Usage example:
deploy_model_on_aws("s3://your-bucket/your-model-path")


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score

# Enhanced monitoring function with additional metrics
def monitor_staleness(predictions, true_labels, threshold=0.8):
    # Calculate multiple metrics for monitoring
    accuracy = accuracy_score(true_labels, predictions)
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')
    print(f"Accuracy: {accuracy}, Precision: {precision}, Recall: {recall}")

    # Trigger retraining if any metric falls below threshold
    if accuracy < threshold or precision < threshold or recall < threshold:
        print("Retraining triggered due to performance degradation.")
        # Add retraining code or call retraining function here
        return True
    return False


In [None]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir='./results',
    per_device_train_batch_size=8,   # Smaller batch size for cost savings
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    logging_dir='./logs',
    save_steps=1000,                 # Save checkpoints intermittently to avoid overuse of resources
    evaluation_strategy="steps"      # Optionally, only evaluate every 'n' steps
)


In [None]:
from flask import Flask, request, jsonify
from transformers import pipeline

app = Flask(__name__)

# Load your model pipeline for text classification
classifier = pipeline("text-classification", model='distilbert-base-uncased')

@app.route('/predict', methods=['POST'])
def predict():
    text = request.json.get('text')
    result = classifier(text)
    return jsonify(result)

# Run the app locally for testing
if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)
