In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as ans

In [None]:
car_df = pd.read_csv('cardata.csv')

In [None]:
car_df

In [None]:
car_df.head()

In [None]:
car_df.tail()

In [None]:
car_df.info()

In [None]:
car_df.describe()

In [None]:
car_df.isnull()

In [None]:
car_df.isnull().sum()

In [None]:
Step 2: Clean the Data
Drop missing values or handle them as needed:

python
Copy
Edit
df.dropna(inplace=True)
📊 Step 3: Visualize Numeric Columns
🔹 Histograms – Distribution of numeric features:
python
Copy
Edit
import matplotlib.pyplot as plt
import seaborn as sns

numeric_cols = df.select_dtypes(include='number').columns

df[numeric_cols].hist(bins=20, figsize=(14, 10), edgecolor='black')
plt.suptitle('Histograms of Numeric Columns')
plt.tight_layout()
plt.show()
🔹 Box Plots – Spot outliers:
python
Copy
Edit
plt.figure(figsize=(14, 8))
for i, col in enumerate(numeric_cols):
    plt.subplot(2, (len(numeric_cols)+1)//2, i+1)
    sns.boxplot(y=df[col])
    plt.title(f'Box Plot of {col}')
plt.tight_layout()
plt.show()
🔡 Step 4: Visualize Categorical Columns
🔹 Bar Charts – Frequency of categories:
python
Copy
Edit
categorical_cols = df.select_dtypes(include='object').columns

for col in categorical_cols:
    plt.figure(figsize=(8, 5))
    sns.countplot(data=df, x=col, order=df[col].value_counts().index)
    plt.title(f'Bar Chart of {col}')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()
🔹 Pie Charts – Composition of categories:
python
Copy
Edit
for col in categorical_cols:
    plt.figure(figsize=(6, 6))
    df[col].value_counts().plot.pie(autopct='%1.1f%%', startangle=90, shadow=True)
    plt.ylabel('')
    plt.title(f'Pie Chart of {col}')
    plt.tight_layout()
    plt.show()
🧠 Step 5: Interpretation Tips
Histograms: Skewed income/loan values? Might need log transformation.

Box Plots: Spot extreme values in income or loan amount.

Bar Charts: Identify imbalances in categories like Gender, Education, etc.

Pie Charts: Quick glance at category proportions, e.g., approval rates.

Let me know if you want this saved as a .ipynb notebook or if you want to move on to deeper analysis like correlations or bivariate plots!









In [None]:
1. Correlation Analysis + Heatmap
This helps identify how strongly numeric variables are related to each other.

python
Copy
Edit
import matplotlib.pyplot as plt
import seaborn as sns

# Calculate correlation matrix
corr_matrix = df.select_dtypes(include='number').corr()

# Plot heatmap
plt.figure(figsize=(10, 6))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt=".2f", linewidths=0.5)
plt.title('Correlation Heatmap of Numeric Features')
plt.show()
🔍 How to interpret it:

Values range from -1 to 1

Closer to 1: Strong positive correlation

Closer to -1: Strong negative correlation

Closer to 0: No linear correlation

✅ 2. Stacked Bar Chart (Categorical Distribution Across Groups)
Let’s say we want to see how Loan_Status is distributed across Property_Area.

python
Copy
Edit
# Create crosstab
cross_tab = pd.crosstab(df['Property_Area'], df['Loan_Status'])

# Plot stacked bar chart
cross_tab.plot(kind='bar', stacked=True, figsize=(8,6), colormap='viridis')
plt.title('Loan Status by Property Area')
plt.xlabel('Property Area')
plt.ylabel('Count')
plt.xticks(rotation=0)
plt.legend(title='Loan Status')
plt.tight_layout()
plt.show()
You can also switch out the variables, like checking:

Married vs Loan_Status

Education vs Loan_Status

Just update the columns in pd.crosstab().



In [None]:
plt.figure(figsize=(8, 6))
sns.scatterplot(data=df, x='ApplicantIncome', y='LoanAmount', hue='Loan_Status')
plt.title('Applicant Income vs Loan Amount')
plt.xlabel('Applicant Income')
plt.ylabel('Loan Amount')
plt.legend(title='Loan Status')
plt.show()
Try other combinations too:

CoapplicantIncome vs LoanAmount

LoanAmount vs Loan_Amount_Term

🔄 2. Pair Plots – Multiple Numeric Variables Interactions
Already covered earlier, but here’s a quick recap with additional combinations:

python
Copy
Edit
# Select multiple numeric variables + a hue for categorical coloring
numeric_cols = ['ApplicantIncome', 'CoapplicantIncome', 'LoanAmount']
sns.pairplot(df[numeric_cols + ['Loan_Status']], hue='Loan_Status', palette='husl')
plt.suptitle('Pairwise Relationships of Numeric Variables', y=1.02)
plt.show()
🎻 3. Box Plots & Violin Plots – Numeric vs Categorical
🔹 Box Plot: Income by Education
python
Copy
Edit
plt.figure(figsize=(8, 6))
sns.boxplot(data=df, x='Education', y='ApplicantIncome', hue='Loan_Status')
plt.title('Applicant Income by Education & Loan Status')
plt.show()
🔹 Violin Plot: Loan Amount by Property Area
python
Copy
Edit
plt.figure(figsize=(8, 6))
sns.violinplot(data=df, x='Property_Area', y='LoanAmount', hue='Loan_Status', split=True)
plt.title('Loan Amount Distribution by Property Area & Loan Status')
plt.show()

In [None]:
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt

# If your df has 'Latitude' and 'Longitude'
df['geometry'] = gpd.points_from_xy(df['Longitude'], df['Latitude'])

# Convert to GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry='geometry')

# Load base world map
world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Plot
fig, ax = plt.subplots(figsize=(15, 10))
world.plot(ax=ax, color='lightgrey')
gdf.plot(ax=ax, column='Loan_Status', legend=True, cmap='coolwarm', markersize=20)
plt.title('Loan Applications by Geographic Location')
plt.show()


In [None]:
# Aggregate approval rate by region
approval_by_region = df.groupby('Property_Area')['Loan_Status'].value_counts(normalize=True).unstack()

# Plot stacked bar chart
approval_by_region.plot(kind='bar', stacked=True, colormap='coolwarm', figsize=(10,6))
plt.title('Loan Approval Rate by Region')
plt.ylabel('Proportion')
plt.xlabel('Region')
plt.legend(title='Loan Status')
plt.tight_layout()
plt.show()


In [None]:
import folium
from folium.plugins import HeatMap

# Create map centered at average location
m = folium.Map(location=[df['Latitude'].mean(), df['Longitude'].mean()], zoom_start=6)

# Add heatmap
heat_data = df[['Latitude', 'Longitude', 'LoanAmount']].dropna().values.tolist()
HeatMap(heat_data).add_to(m)

m.save('loan_heatmap.html')  # Save as interactive map
