In [2]:
import torch
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
# 클래스 정의
class LoanAnalysis:
    """
    1. 라벨링 수행 TN, FN, TP, FP
    2. 각 라벨별 annualized return(ar) 계산 수행: annualized return = return^(1/term)
    3. 포트폴리오 전체 ar 계산 수행
    """
    def __init__(self, dataframe):
        self.df = dataframe.copy()
        self.label_results()

    def label_results(self):
        self.df['result'] = self.df.apply(lambda row: 
            'True Negative' if row['predicted'] == 0 and row['actual'] == 0 else 
            'False Negative' if row['predicted'] == 0 and row['actual'] == 1 else 
            'True Positive' if row['predicted'] == 1 and row['actual'] == 1 else 
            'False Positive' if row['predicted'] == 1 and row['actual'] == 0 else 
            'Other', axis=1)

        # return^(1/term)계산하는 함수 정의
    def calculate_annualized_return(row):
        return row['return'] ** (1 / row['term'])


    def calculate_true_negative_ar(self):
        """
        True Negative 레코드를 필터링하고 return 열을 계산합니다.
        """
        true_negative_df = self.df[self.df['result'] == 'True Negative'].copy()
        true_negative_df['return'] = true_negative_df['installment'] * true_negative_df['term']
        true_negative_df['annualized return'] = true_negative_df.apply(lambda row: row['return'] ** (1 / row['term']), axis=1)

        return true_negative_df

    def calculate_false_negative_ar(self):
        """
        False Negative 레코드를 필터링하고 return 열을 계산합니다.
        """
        false_negative_df = self.df[self.df['result'] == 'False Negative'].copy()
        false_negative_df['return'] = false_negative_df['total_pymnt'] - false_negative_df['loan_amnt'] + false_negative_df['recoveries'] - false_negative_df['collection_recovery_fee']
        false_negative_df['annualized return'] = false_negative_df.apply(lambda row: row['return'] ** (1 / row['term']), axis=1)
        return false_negative_df

    def calculate_positives_ar(self):
        """
        True Positive와 False Positive 레코드를 필터링하고 return 열을 계산합니다.
        """
        positive_df = self.df[self.df['result'].isin(['True Positive', 'False Positive'])].copy()
        positive_df['return'] = positive_df['tbond_int'] * positive_df['loan_amnt']
        positive_df['annualized return'] = positive_df.apply(lambda row: row['return'] ** (1 / row['term']), axis=1)
        return positive_df

    def process_all(self):
        """
        모든 레코드에 대한 return 열을 계산합니다.
        """
        tn_df = self.calculate_true_negative_ar()
        fn_df = self.calculate_false_negative_ar()
        pos_df = self.calculate_positives_ar()

        # 각 데이터프레임을 결합하여 반환
        final_df = pd.concat([tn_df, fn_df, pos_df])
        return final_df
    
    def calculate_portfolio_ar(self):
        final_df = self.process_all()
        # 각 자산의 비중(weight) 계산
        final_df['weight'] = final_df['return'] / final_df['return'].sum()

        # 포트폴리오 전체의 annualized return 계산
        portfolio_annualized_return = (final_df['weight'] * final_df['annualized return']).sum()-1
        return portfolio_annualized_return


In [10]:
# 예시 데이터프레임 생성
data = {
    'predicted': [0, 1, 0, 0, 1, 1, 0, 1],
    'actual': [0, 1, 0, 1, 0, 1, 0, 0],
    'installment': [100, 150, 200, 250, 300, 350, 400, 450],
    'term': [12, 24, 36, 48, 60, 72, 84, 96],
    'total_pymnt': [1200, 3000, 2400, 5000, 3500, 6000, 3300, 4500],
    'loan_amnt': [1000, 2500, 2000, 4500, 3000, 5500, 2800, 4000],
    'recoveries': [50, 100, 150, 200, 250, 300, 350, 400],
    'collection_recovery_fee': [10, 20, 30, 40, 50, 60, 70, 80],
    'tbond_int': [0.03, 0.04, 0.05, 0.06, 0.07, 0.08, 0.09, 0.10]
}

# 데이터프레임 생성
df = pd.DataFrame(data)

# 클래스 인스턴스 생성
analysis = LoanAnalysis(df)

# 모든 데이터를 처리하고 결과를 얻음
final_result_df = analysis.process_all()
portfolio_annualized_return = analysis.calculate_portfolio_ar()
# 결과 출력
print(f"portfolio annualized return : {portfolio_annualized_return}",'\n',final_result_df)

portfolio annualized return : 0.17395020039019182 
    predicted  actual  installment  term  total_pymnt  loan_amnt  recoveries  \
0          0       0          100    12         1200       1000          50   
2          0       0          200    36         2400       2000         150   
6          0       0          400    84         3300       2800         350   
3          0       1          250    48         5000       4500         200   
1          1       1          150    24         3000       2500         100   
4          1       0          300    60         3500       3000         250   
5          1       1          350    72         6000       5500         300   
7          1       0          450    96         4500       4000         400   

   collection_recovery_fee  tbond_int          result   return  \
0                       10       0.03   True Negative   1200.0   
2                       30       0.05   True Negative   7200.0   
6                       70       0.09 