In [None]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load the dataset from the uploaded CSV file
file_path = '/content/loan.csv'
data = pd.read_csv(file_path)

In [None]:
# Display the first few rows of the dataset
print("First few rows of the dataset:")
print(data.head())

In [None]:
# Display basic information
print("\nDataset Information:")
print(data.info())

In [None]:
# Checking null values
data.isna().sum()

In [None]:
# Display descriptive statistics
print("\nDescriptive Statistics:")
print(data.describe())

In [None]:
numeric_data = data.select_dtypes(include=['int64', 'float64'])

In [None]:
# Histogram for the 'Income' column
def plot_income_histogram():
    plt.figure(figsize=(6, 4))
    plt.hist(data['income'], bins=10, color='skyblue', edgecolor='black')
    plt.title('Income Distribution')
    plt.xlabel('Income')
    plt.ylabel('Frequency')
    plt.grid(axis='y', alpha=0.75)
    plt.show()

plot_income_histogram()

In [None]:
# Scatter plot for 'Age' vs. 'Credit Score'
def plot_age_credit_score_scatter():
    plt.figure(figsize=(6, 4))
    plt.scatter(data['age'], data['credit_score'], color='green', alpha=0.6)
    plt.title('Age vs. Credit Score')
    plt.xlabel('Age')
    plt.ylabel('Credit Score')
    plt.grid(True, linestyle='--', alpha=0.5)
    plt.show()

plot_age_credit_score_scatter()

In [None]:
def plot_income_credit_score_boxplot():
    plt.figure(figsize=(12, 6))

    # Box plot for income
    plt.subplot(1, 2, 1)
    sns.boxplot(x='loan_status', y='income', data=data, palette='Set2')
    plt.title('Income by Loan Status')
    plt.xlabel('Loan Status')
    plt.ylabel('Income')

    # Box plot for credit score
    plt.subplot(1, 2, 2)
    sns.boxplot(x='loan_status', y='credit_score', data=data, palette='Set2')
    plt.title('Credit Score by Loan Status')
    plt.xlabel('Loan Status')
    plt.ylabel('Credit Score')

    plt.tight_layout()
    plt.show()

plot_income_credit_score_boxplot()

In [None]:
# Additional statistics using pandas describe() and corr()
print("\nPandas Describe:")
print(data.describe())

# Correlation matrix
correlation_matrix = numeric_data.corr()
# Heatmap of the correlation matrix
def plot_correlation_heatmap():
    plt.figure(figsize=(8, 6))
    sns.heatmap(correlation_matrix, annot=True, fmt='.2f', cmap='coolwarm', linewidths=0.5)
    plt.title('Pandas Correlation Matrix', fontsize=16)
    plt.xticks(rotation=45)
    plt.yticks(rotation=45)
    plt.show()

plot_correlation_heatmap()