# Credit Card Limit Prediction

This notebook implements a machine learning model to predict credit card limits using customer data.

## Dataset
Download the dataset from: https://www.kaggle.com/datasets/sakshigoyal7/credit-card-customers
Place the `BankChurners.csv` file in the same directory as this notebook.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better plots
plt.style.use('seaborn-v0_8')
sns.set_palette("husl")


## Step 1: Load and Explore the Dataset


In [None]:
# Load the dataset
df = pd.read_csv('BankChurners.csv')

# Basic data exploration
print("Dataset shape:", df.shape)
print("\nColumn names:")
print(df.columns.tolist())
print("\nFirst 5 rows:")
print(df.head())


In [None]:
# Dataset info
print("Dataset info:")
print(df.info())
print("\nMissing values:")
print(df.isnull().sum())


## Step 2: Data Preprocessing


In [None]:
# Select target and features
y = df['Credit_Limit']

# Select the specified input features
features_to_use = [
    'Customer_Age',
    'Dependent_count', 
    'Education_Level',
    'Income_Category',
    'Months_on_book',
    'Total_Relationship_Count',
    'Total_Trans_Amt',
    'Marital_Status'
]

X = df[features_to_use].copy()

print("Selected features:")
print(features_to_use)
print(f"\nTarget variable (Credit_Limit) range: ${y.min():,.2f} - ${y.max():,.2f}")


In [None]:
# Check for missing values in selected features
print("Missing values per column:")
print(X.isnull().sum())

# Check unique values in categorical columns
print("\nUnique values in categorical columns:")
for col in ['Education_Level', 'Income_Category', 'Marital_Status']:
    print(f"\n{col}:")
    print(X[col].value_counts())


In [None]:
# Handle categorical variables with ordinal encoding

# Education Level encoding
education_mapping = {
    'Unknown': 0,
    'Uneducated': 1,
    'High School': 2,
    'College': 3,
    'Graduate': 4,
    'Post-Graduate': 5,
    'Doctorate': 6
}
X['Education_Level'] = X['Education_Level'].map(education_mapping)

# Income Category encoding
income_mapping = {
    'Unknown': 0,
    'Less than $40K': 1,
    '$40K - $60K': 2,
    '$60K - $80K': 3,
    '$80K - $120K': 4,
    '$120K +': 5
}
X['Income_Category'] = X['Income_Category'].map(income_mapping)

# Marital Status encoding
marital_mapping = {
    'Unknown': 0,
    'Single': 1,
    'Married': 2,
    'Divorced': 3
}
X['Marital_Status'] = X['Marital_Status'].map(marital_mapping)

# Fill any NaN values that resulted from mapping
X = X.fillna(0)

print("Categorical encoding complete!")
print("\nEncoded features preview:")
print(X.head())
