## Methods to display the geenric reports of the application

In [4]:
import import_ipynb
from db_setup import get_collections
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Fetch the collections from the database
collections = get_collections()
original_collection = collections["original"]
loan_request_collection = collections["loan_requests"]
payments_collection = collections["payments"]
borrower_profile_collection = collections["borrower_profiles"]
lender_profile_collection = collections["lender_profiles"]
loan_collection = collections["loans"]
payment_data = payments_collection.find()
payment_df = pd.DataFrame(list(payment_data))
payment_profiles = pd.json_normalize(
    payment_df['payment_profile'].explode()
)

# Convert the collection to repective data frames for analysis
loan_request_data = loan_request_collection.find()
loan_request_df = pd.DataFrame(list(loan_request_data))
if '_id' in loan_request_df.columns:
    loan_request_df.drop('_id', axis=1, inplace=True)


loan_data = loan_collection.find()
loan_df = pd.DataFrame(list(loan_data))


borrower_data = borrower_profile_collection.find()
borrower_df = pd.DataFrame(list(borrower_data))


lender_data = lender_profile_collection.find()
lender_df = pd.DataFrame(list(lender_data))

original_data = original_collection.find()
original_df = pd.DataFrame(list(original_data))

# Normalize the Payments profile
def normalize_loan():
    print(payment_profiles.head())

# View summary stats on the payments
def summary_stats():
    summary_stats = payment_df.describe()
    print(summary_stats)

# Display missing data
def display_missing_data():
    missing_data = payment_df.isnull().sum()
    print("Missing Data Summary:")
    print(missing_data)

# Display corelation between the total loan amount paid and the balance
def define_corelation():
    correlation_matrix = payment_profiles[['paid_total', 'balance']].corr()
    print("Correlation Matrix:")
    print(correlation_matrix)

# Apply group by on the loan status
def group_by():
    grouped_data = loan_request_df.groupby('status')[['requested_amount', 'predicted_interest_rate']].mean()
    print("Grouped Data by Loan Status:")
    print(grouped_data)

# Data transformation on getting the balance percentage of loan amount
def lambda_balance_percent():
    payment_profiles['remaining_balance_percent'] = payment_profiles.apply(lambda x: (x['balance'] / x['loan_amount'])
                                                                           * 100 if x['loan_amount'] != 0 else 0, axis=1)
    print(payment_profiles.head())

# Data transformation on getting high interest flag
def lambda_high_interest_flag():
    loan_request_df['high_interest'] = loan_request_df['predicted_interest_rate'].apply(lambda x: 1 if x > 15 else 0)
    print(loan_request_df.head())


def grade_to_risk_category(grade):
    if grade.startswith(('A', 'B')):
        return 'Low Risk'
    elif grade.startswith(('C', 'D')):
        return 'Medium Risk'
    elif grade.startswith(('E', 'F')):
        return 'High Risk'
    else:
        return 'Unknown'


def define_loan_category():
    loan_request_df['grade'] = loan_request_df['grade'].str.upper()

    loan_request_df['risk_category'] = loan_request_df['grade'].apply(grade_to_risk_category)

    print(loan_request_df[['grade', 'risk_category']].head())

# Plot risk category of grade of loans
def plot_risk_category():
    define_loan_category()
    plt.figure(figsize=(8, 6))
    sns.countplot(data=loan_request_df, x='risk_category', order=['Low Risk', 'Medium Risk', 'High Risk'])
    plt.title('Number of Loans in Each Risk Category')
    plt.xlabel('Risk Category')
    plt.ylabel('Number of Loans')
    plt.tight_layout()
    plt.show()

# Plot Loan amount v/s Interest rate
def scatter_plot_loan_amount_vs_interest_rate():
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=loan_df,x='loan_amount',y='interest_rate',alpha=0.7)
    plt.title("Relationship Between Loan Amount and Interest Rate")
    plt.xlabel("Loan Amount")
    plt.ylabel("Interest Rate (%)")
    plt.grid(True)
    plt.show()

# Show Corelation matrix map for loans
def show_corelation_matrix_map():
    numeric_columns = ['loan_amount', 'interest_rate', 'installment','paid_total']
    
    # Calculate the correlation matrix
    correlation_matrix = loan_df[numeric_columns].corr()
    
    # Create the heatmap
    plt.figure(figsize=(8, 8))
    plt.imshow(correlation_matrix, cmap='Blues', interpolation='nearest')
    plt.colorbar(label='Correlation Coefficient')
    
    # Add labels to the heatmap
    labels = numeric_columns
    plt.xticks(np.arange(len(labels)), labels, rotation=45, ha='right')
    plt.yticks(np.arange(len(labels)), labels)
    plt.title('Correlation Heatmap')
    
    # Add text annotations
    for i in range(len(labels)):
        for j in range(len(labels)):
            plt.text(j, i, f'{correlation_matrix.iloc[i, j]:.2f}',
                     ha='center', va='center', color='black')
    
    plt.tight_layout()
    plt.show()

# Plot loan status distribution
def show_loan_status_chart():
    # Loan status distribution
    loan_status_counts = loan_request_df['status'].value_counts()
    
    plt.figure(figsize=(8, 6))
    loan_status_counts.plot(kind='bar')
    plt.title("Loan Status Distribution")
    plt.xlabel("Loan Status")
    plt.ylabel("Count")
    plt.show()

# Plot Annual Income v/s Loan Amount balance
def plot_income_vs_balance():
    payment_profiles = pd.json_normalize(
        payment_df['payment_profile'].explode()
    )
    
    # Scatter plot for annual income vs balance
    plt.figure(figsize=(10, 6))
    plt.scatter(payment_profiles['paid_total'], payment_profiles['balance'], alpha=0.5)
    plt.title("Total Amount Paid vs. Balance")
    plt.xlabel("Total Amount Paid")
    plt.ylabel("Balance")
    plt.show()

# Show Borrower's statistics
def borrower_loan_amount_vs_interest_rate():
    # Scatter plot using Seaborn
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=borrower_df,x='income',y='total_loan_amount',alpha=0.7)
    # Add labels and title
    plt.title("Relationship Between Loan Amount and Interest Rate")
    plt.xlabel("Loan Amount")
    plt.ylabel("Interest Rate (%)")
    plt.grid(True)
    # Show the plot
    plt.show()

# Plot Borrower's Average Loan amount v/s Annual Income
def borrower_average_loan_vs_income():
    # Aggregate the data by income range or a specific group, e.g., mean total_loan_amount per income bracket
    borrower_df['income_bracket'] = pd.cut(borrower_df['income'], bins=10)  # Create income brackets
    aggregated_data = borrower_df.groupby('income_bracket')['total_loan_amount'].mean().reset_index()
    
    # Bar chart using Seaborn
    plt.figure(figsize=(12, 7))
    sns.barplot(data=aggregated_data, x='income_bracket', y='total_loan_amount', alpha=0.8)
    
    # Rotate x-axis labels for better visibility
    plt.xticks(rotation=45)
    # Add labels and title
    plt.title("Average Loan Amount by Income Bracket")
    plt.xlabel("Income Bracket")
    plt.ylabel("Average Loan Amount")
    plt.grid(axis='y')  # Grid lines only for y-axis
    # Show the plot
    plt.show()

# Show Corelation matrix map for Borrowers
def borrower_heatmap():
    # Assuming 'loan_data' is already loaded and contains the necessary columns
    numeric_columns = ['income', 'total_loan_amount', 'debt_to_income_ratio']
    
    # Calculate the correlation matrix
    correlation_matrix = borrower_df[numeric_columns].corr()
    
    # Create the heatmap
    plt.figure(figsize=(8, 8))
    plt.imshow(correlation_matrix, cmap='Blues', interpolation='nearest')
    plt.colorbar(label='Correlation Coefficient')
    
    # Add labels to the heatmap
    labels = numeric_columns
    plt.xticks(np.arange(len(labels)), labels, rotation=45, ha='right')
    plt.yticks(np.arange(len(labels)), labels)
    plt.title('Correlation Heatmap')
    
    # Add text annotations
    for i in range(len(labels)):
        for j in range(len(labels)):
            plt.text(j, i, f'{correlation_matrix.iloc[i, j]:.2f}',
                     ha='center', va='center', color='black')
    
    plt.tight_layout()
    plt.show()

# Show Lender's statistics
def lender_scatter_plot():
    # Scatter plot using Seaborn
    plt.figure(figsize=(10, 6))
    sns.scatterplot(data=lender_df, x='income', y='total_invested',alpha=0.7, color='red')
    
    # Add labels and title
    plt.title("Relationship Between Income and Total Invested")  
    plt.xlabel("Income")                                         
    plt.ylabel("Total Invested")                                 
    plt.grid(True)
    
    # Show the plot
    plt.show()

# Relationship between the interest rate and sub-grade
def plot_interest_rate_versus_sub_grade():
    
    # Calculate average interest rate for each sub-grade
    subgrade_interest_rate = original_df.groupby('sub_grade')['interest_rate'].mean().sort_index()
    
    # Create a bar chart
    plt.figure(figsize=(12, 6))
    plt.bar(subgrade_interest_rate.index, subgrade_interest_rate.values, alpha=0.8)
    plt.title('Average Interest Rate by Sub-Grade')
    plt.xlabel('Sub-Grade')
    plt.ylabel('Average Interest Rate (%)')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()