In [2]:
# 5.1 - 5.4 Data analysis for loan application along with visuals
import pandas as pd
import plotly.express as px
from sqlalchemy import create_engine

# Define MySQL connection properties
db_config = {
    'host': 'localhost',
    'user': 'root',
    'password': 'Password',
    'database': 'creditcard_capstone'
}

# Create a SQLAlchemy engine for connecting to the MySQL database
engine = create_engine(f"mysql+pymysql://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}")

# Function to fetch data from the database using a query
def fetch_data_from_db(query):
    # Use the SQLAlchemy engine to fetch data
    with engine.connect() as conn:
        data = pd.read_sql_query(query, conn)  # Execute the query and store the result in a DataFrame
    return data

# Function to plot the number of loan approvals for self-employed applicants
def plot_self_employed_approval():
    query = """
    SELECT Employment_status, COUNT(*) as Count
    FROM cdw_sapp_loan_application
    WHERE App_status = 'Y'
    GROUP BY Employment_status
    """
    data = fetch_data_from_db(query)
    if data.empty:
        print("No data found for self-employed approvals")
        return
     
    fig = px.bar(data, x='Employment_status', y='Count', title='Loan Approvals for Self-Employed Applicants',
                 labels={'Employment_status': 'Employment Status', 'Count': 'Number of Approvals'},
                 hover_data={'Employment_status': True, 'Count': True})
    fig.show()

# Function to plot the number of rejections for married male applicants
def plot_married_male_rejections():
    query = """
    SELECT 
        App_status,
        COUNT(*) as Count
    FROM 
        cdw_sapp_loan_application
    WHERE 
        Gender = 'Male' AND Married = 'Yes'
    GROUP BY 
        App_status
    """
    data = fetch_data_from_db(query)
    if data.empty:
        print("No data found for married male rejections")
        return
    
    fig = px.bar(data, x='App_status', y='Count', title='Rejections for Married Male Applicants',
                 labels={'App_status': 'Application Status', 'Count': 'Number of Applications'},
                 hover_data={'App_status': True, 'Count': True})
    fig.show()

# Function to plot the number of applications over the last three months
def plot_applications_last_three_months():
    query = """
    SELECT 
        year, 
        month, 
        COUNT(transaction_id) as transaction_count
    FROM 
        CDW_SAPP_CREDIT_CARD
    GROUP BY 
        year, month
    ORDER BY 
        year DESC, month DESC
    LIMIT 3
    """
    data = fetch_data_from_db(query)
    if data.empty:
        print("No data found for applications in the last three months")
        return
    
    month_mapping = {10: 'October', 11: 'November', 12: 'December'}  # Map month numbers to names
    data['month_name'] = data['month'].map(month_mapping)

    fig = px.line(data, x='month_name', y='transaction_count', title='Loan Applications Over the Last Three Months',
                  labels={'month_name': 'Month', 'transaction_count': 'Number of Applications'},
                  markers=True)
    fig.show()

# Function to plot the branches with the highest healthcare transactions
def plot_highest_healthcare_transactions():
    query = """
    SELECT 
        branch_code, 
        SUM(transaction_value) as total_value
    FROM 
        CDW_SAPP_CREDIT_CARD
    WHERE 
        transaction_type = 'Healthcare'
    GROUP BY 
        branch_code
    ORDER BY 
        total_value DESC
    LIMIT 5
    """
    data = fetch_data_from_db(query)
    if data.empty:
        print("No healthcare transactions data found")
        return

    # Scatter plot
    fig_scatter = px.scatter(data, x='branch_code', y='total_value', title='Branch with Highest Healthcare Transactions (Scatter Plot)',
                             labels={'branch_code': 'Branch Code', 'total_value': 'Total Transaction Value'},
                             hover_data={'branch_code': True, 'total_value': True},
                             size='total_value', color='branch_code')
    fig_scatter.show()

# Main function to call all plot functions
def main():
    plot_self_employed_approval()
    plot_married_male_rejections()
    plot_applications_last_three_months()
    plot_highest_healthcare_transactions()

if __name__ == "__main__":
    main()  # Execute the main function if this script is run directly
