In [2]:

"""
Title: Credit Score Risk Classifier
Description: A simple ML pipeline to classify credit risk levels and store outputs in AWS S3.
"""

# Step 1: Import Libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
import boto3
import os

# Step 2: Simulate Dataset
data = pd.DataFrame({
    'age': np.random.randint(21, 70, 100),
    'income': np.random.randint(20000, 120000, 100),
    'loan_amount': np.random.randint(1000, 50000, 100),
    'credit_score': np.random.randint(300, 850, 100),
})

def label_risk(score):
    if score < 580:
        return 'High'
    elif score < 670:
        return 'Medium'
    else:
        return 'Low'

data['risk'] = data['credit_score'].apply(label_risk)

# Save dataset locally
os.makedirs('../data', exist_ok=True)
data_path = '../data/credit_data.csv'
data.to_csv(data_path, index=False)
print(f"Dataset saved to {data_path}")

# Step 3: Train/Test Split and Model Training
X = data.drop('risk', axis=1)
y = data['risk']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 4: Predict and Save Results
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
results_df = pd.DataFrame(report).transpose()

# Save results locally
os.makedirs('../output', exist_ok=True)
results_path = '../output/risk_predictions.csv'
results_df.to_csv(results_path)
print(f"Prediction report saved to {results_path}")

# Step 5: Upload to AWS S3 (Ensure aws configure is done)
s3 = boto3.client('s3', region_name='us-east-1')  # correct region
bucket_name = 'shrushti-credit-bucket'  #  bucket name created on s3 aws
  

# Upload both files
s3.upload_file(data_path, bucket_name, 'input/credit_data.csv')
s3.upload_file(results_path, bucket_name, 'output/risk_predictions.csv')

print("Files uploaded to S3 successfully!")


Dataset saved to ../data/credit_data.csv
Prediction report saved to ../output/risk_predictions.csv


  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():
  if not hasattr(array, "sparse") and array.dtypes.apply(is_sparse).any():


Files uploaded to S3 successfully!
