In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# 1. Load and Clean Data (Required for clean plots)
df = pd.read_csv('train.csv')

# Simple Imputation (Filling blanks so charts don't crash)
cat_cols = ['Gender', 'Married', 'Dependents', 'Self_Employed', 'Credit_History', 'Property_Area']
for col in cat_cols:
    df[col] = df[col].fillna(df[col].mode()[0])

num_cols = ['LoanAmount', 'Loan_Amount_Term', 'ApplicantIncome', 'CoapplicantIncome']
for col in num_cols:
    df[col] = df[col].fillna(df[col].median())

# ==========================================
# IMAGE 1: Credit History Impact
# ==========================================
plt.figure(figsize=(6, 4))
sns.countplot(x='Credit_History', hue='Loan_Status', data=df, palette='viridis')
plt.title('Impact of Credit History on Loan Approval')
plt.xlabel('Credit History (0=Bad, 1=Good)')
plt.ylabel('Count of Applicants')
# Save the file
plt.savefig('credit_history_impact.png', bbox_inches='tight', dpi=300)
plt.close() # Closes the plot so it doesn't overlap with the next one
print("✅ Saved 'credit_history_impact.png'")

# ==========================================
# IMAGE 2: Income Distribution (Outliers)
# ==========================================
plt.figure(figsize=(12, 5))

# Subplot 1: Histogram
plt.subplot(1, 2, 1)
sns.histplot(df['ApplicantIncome'], kde=True, color='blue')
plt.title('Applicant Income Distribution (Skewed)')

# Subplot 2: Boxplot (Shows the dots/outliers)
plt.subplot(1, 2, 2)
sns.boxplot(y=df['ApplicantIncome'], color='orange')
plt.title('Applicant Income Outliers')

# Save the file
plt.savefig('income_distribution.png', bbox_inches='tight', dpi=300)
plt.close()
print("✅ Saved 'income_distribution.png'")

# ==========================================
# IMAGE 3: Correlation Heatmap
# ==========================================
# Map Loan_Status to numbers (1/0) just for this chart
df_temp = df.copy()
df_temp['Loan_Status_Num'] = df_temp['Loan_Status'].map({'Y': 1, 'N': 0})

plt.figure(figsize=(10, 8))
# Select only numerical columns
numerical_df = df_temp.select_dtypes(include=['number'])
# Create Heatmap
sns.heatmap(numerical_df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Matrix')

# Save the file
plt.savefig('correlation_heatmap.png', bbox_inches='tight', dpi=300)
plt.close()
print("✅ Saved 'correlation_heatmap.png'")