In [11]:
from pyspark.sql import SparkSession
spark = SparkSession.Builder().appName("Analysis").master("local[12]").getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/10/09 21:49:40 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
23/10/09 21:49:40 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.


In [13]:
df = spark.read.parquet("../test_data.parquet")
df.collect()[0]

                                                                                

Row(Id='533b409c-48ab-4222-ae1e-9da42cfbf78b', Notional=760954, Interest Rate=1.8259983840056635, Reset Frequency=9, Start Date=datetime.datetime(2018, 2, 28, 10, 32, 4, 557479), Term=25, Remaining Notional=611299.7133333334, Payment Type='Linear', Risk Indicator=0, Next Reset Date=datetime.datetime(2027, 2, 26, 10, 32, 4, 557479))

In [3]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

import matplotlib as mpl
from pyspark.sql import functions as fn
from pyspark.sql import Row
from pyspark.sql.functions import col, lit
from datetime import datetime, timedelta
ASSUMED_DATE_TODAY=datetime(year=2023,month=1,day=1)

In [4]:
annuities = df.filter(df['Payment_Type'] == "Annuity")
linears = df.filter(df['Payment_Type'] == "Linear")
bullets = df.filter(df['Payment_Type'] == "Bullet")

In [5]:
linearpayments = linears.rdd.map(
    lambda x: (x['Id'], x['Notional'] / x['Term']))

linearpayments = linearpayments.toDF(["f_Id", "monthly_repayments"]) 
#linearpayments.show()

In [6]:
bulletpayments = bullets.rdd.map(
    lambda x: (x["Id"], 0.0)
).toDF(["f_Id", "monthly_payments"])

In [7]:
def calc_annuity_payment(notional, interest, term):
    monthsTotal = term * 12
    r = interest / 12 / 100
    payAmount = (notional*r*(pow(1+r, monthsTotal))) / (pow(1+r, monthsTotal) -1)
    return payAmount

annuitypayments = annuities.rdd.map(
    lambda x: (x["Id"], calc_annuity_payment(x["Notional"], x["Interest_Rate"], x["Term"]) )
).toDF(["f_Id", "monthly_payments"])

In [8]:
#annuities.show()

In [9]:
annuities = annuities.join(annuitypayments, annuities['Id'] == annuitypayments['f_Id'], "inner")
linears = linears.join(linearpayments, linears['Id'] == linearpayments['f_Id'], "inner")
bullets = bullets.join(bulletpayments, bullets['Id'] == bulletpayments['f_Id'], "inner")
df_full = annuities.union(linears).union(bullets)

In [10]:
df_full = df_full.withColumn("end_date", fn.add_months(col("Start_Date"), col("Term")*12))

In [11]:
#create schema for the results table
from pyspark.sql.types import StructType,StructField, StringType, DecimalType, IntegerType, DateType
schema = StructType([
  StructField('Id', StringType(), False),
  StructField('Interest_Rate', DecimalType(), False),
  StructField('Reset_Frequency', IntegerType(), False),
    StructField('Remaining_Notional', DecimalType(), False),
    StructField('Risk_Indicator', IntegerType(), False),
    StructField('Next_Reset_Date', DateType(), False),
    StructField('monthly_payment', DecimalType(), False)
  ])

In [12]:
payment_projection = spark.createDataFrame([], schema)
payment_projection.printSchema()

root
 |-- Id: string (nullable = false)
 |-- Interest Rate: decimal(10,0) (nullable = false)
 |-- Reset Frequency: integer (nullable = false)
 |-- Remaining Notional: decimal(10,0) (nullable = false)
 |-- Risk Indicator: integer (nullable = false)
 |-- Next Reset Date: date (nullable = false)
 |-- monthly_payment: decimal(10,0) (nullable = false)



In [1]:
import random

riskMigration = {
	0: (0.0, 1 - 0.001),
	1: (0.1, 1 - 0.01),
	2: (0.05, 1 - 0.01),
	3: (0.05, 1 - 0.05),
	4: (0.2, 1 - 0.1),
}
additionalInterestRatePerDuration = {
	30: 2.2,
	25: 1.9,
	20: 1.5,
	15: 1.0,
	10: 0.5,
	9: 0.4,
	7: 0.1,
	5: 0.0,

}
additionalInterestRatePerRiskCategory = {
	0: 0.0,
	1: 0.3 ,
	2: 1.1 ,
	3: 1.9 ,
	4: 3.5 ,
}

def migrate_risk_category(old_category: int):
	draw = random.random()
	probabilities = riskMigration[old_category]
	if draw < probabilities[0]:
		return old_category - 1
	elif draw > probabilities[1]:
		return old_category + 1
	else:
		return old_category

def calc_one_step(original_row: Row, curr_date: datetime, T_Minus_one: Row):
	#fill in t minus one if we are in period 0
	if T_Minus_one is None:
		T_Minus_one = original_row
	#migrate the risk
	newRisk = migrate_risk_category(T_Minus_one['Risk_Indicator'])
	#check if we need to rest the interest rate, using month etc so we dont accidentily miss one if the day differs
	resetFrequency = T_Minus_one['Reset_Frequency']
	interestRate = T_Minus_one['Interest_Rate']
	resetDate = T_Minus_one['Next_Reset_Date']
	if curr_date.month == T_Minus_one['Next_Reset_Date'].month and curr_date.day == T_Minus_one['Next_Reset_Date'].day:
		impliedBaseRate = original_row["Interest_Rate"] - additionalInterestRatePerDuration[original_row['Term']] - additionalInterestRatePerRiskCategory[original_row['Risk_Indicator']]
		yearsLeft = original_row['end_date'].year - original_row['Start Date'].year
		reset_options = [30, 25, 20, 15, 10, 9, 7, 5]
		resetFrequency = min([x for x in reset_options if x >= yearsLeft])
		resetDate = datetime(year=resetDate.year + yearsLeft, month= resetDate.month, day= resetDate.day)
		#new reset frequency is implied to be the new duration
		interestRate = impliedBaseRate + additionalInterestRatePerDuration[resetFrequency] + additionalInterestRatePerRiskCategory[newRisk]
		if original_row['Payment_Type'] == 'Annuity':
			#re-calculate the annuity payments
			#too tired, continue here later

	if newRisk == 5:
		interest = 0.0
		repayment = 0.0
		writeOff = T_Minus_one['Remaining_Notional']
	elif original_row['Payment_Type'] == 'Bullet':
		interest = (interestRate / 12) * T_Minus_one['Remaining_Notional']
		repayment = 0.0
		writeOff = 0
	elif original_row['Payment_Type'] == 'Linear':
		interest = (interestRate / 12) * T_Minus_one['Remaining_Notional']
		repayment = original_row['monthly_payment']
		writeOff = 0
	elif original_row['Payment_Type'] == 'Annuity':
		interest = (interestRate / 12) * T_Minus_one['Remaining_Notional']
		repayment = original_row['monthly_payment'] - interest
		writeOff = 0
	remainingNotional = T_Minus_one['Remaining_Notional'] - repayment - writeOff
	newrow = Row(
        	Id=original_row['Id'],
        	Interest_Rate=interestRate,
		Reset_Frequency=resetFrequency,
		Remaining_Notional=remainingNotional,
		Risk_Indicator=newRisk,
		Next_Reset_Date=resetDate,
		monthly_payment=monthlyPayment
	)


def calc_all_periods_for_row(row: Row):
    #endDate = row["end_date"]["end_date"]
    endDate = datetime(2030,1,1)
    curr_date = ASSUMED_DATE_TODAY
    listresults = []
    results = None
    while curr_date < endDate:
        results = calc_one_step(row, curr_date, results)
        listresults.append(results)
        curr_date += timedelta(days=31)
        curr_date -= (timedelta(days=curr_date.day-1))
        #above two lines should get the beginning of the month
    totals = spark.createDataFrame(listresults)
    payment_projection = payment_projection.union(totals)
        

IndentationError: expected an indented block after function definition on line 11 (1337018053.py, line 14)

In [17]:
spark.stop()

In [16]:
kwargs = {"keys": ["foo", "b a r"], "values": [1, 2]}
r = Row(kwargs=kwargs)
print(r)

Row(kwargs={'keys': ['foo', 'b a r'], 'values': [1, 2]})
