##  Imports - FIX


In [0]:
from pyspark.sql.functions import col, sum, count, avg, desc, current_timestamp, when, lit
from pyspark.sql.window import Window


## Read silver tables

In [0]:
loans = spark.table('msme_risk_analytics.silver_loan_default')

## Create ML features

In [0]:
features = loans.select(
    'ID',
    'loan_amount',
    'income',
    'rate_of_interest',
    'Credit_Score',
    'LTV',
    'dtir1',
    'Status',
    
    (col('loan_amount') / col('income')).alias('loan_to_income_ratio'),
    (col('loan_amount') / col('property_value')).alias('loan_to_property_ratio'),
    when(col('Credit_Score') >= 750, 'Excellent')
     .when(col('Credit_Score') >= 650, 'Good')
     .when(col('Credit_Score') >= 550, 'Fair')
     .otherwise('Poor').alias('credit_category'),
    
    (
        (100 - col('Credit_Score')/10) * 0.4 +
        col('dtir1') * 0.3 +
        col('LTV') * 0.3
    ).alias('risk_score')
)

features.write \
    .format('delta') \
    .mode('overwrite') \
    .saveAsTable('msme_risk_analytics.silver_ml_features')

## PRIVATE ANALYSIS

In [0]:
default_analysis = loans \
    .groupBy('loan_purpose') \
    .agg(
        count('*').alias('total_loans'),
        sum(col('Status')).alias('defaults'),
        (sum(col('Status')) / count('*') * 100).alias('default_rate_pct')
    ) \
    .orderBy(desc('default_rate_pct'))

default_analysis.show()

+------------+-----------+--------+------------------+
|loan_purpose|total_loans|defaults|  default_rate_pct|
+------------+-----------+--------+------------------+
|          p4|       2061|    1086| 52.69286754002911|
|          p2|        431|     198| 45.93967517401392|
|          p3|       8464|    2892|34.168241965973536|
|          p1|       5574|    1713|30.731969860064584|
|        NULL|         22|       6| 27.27272727272727|
+------------+-----------+--------+------------------+

