In [1]:
import numpy as np
import pandas as pd

# Create a synthetic dataset with 200 records
np.random.seed(42)
data_size = 200

age = np.random.randint(18, 65, size=data_size)  # Customer age
income = np.random.randint(25000, 150000, size=data_size)  # Annual income
debt = np.random.randint(500, 50000, size=data_size)  # Debt amount
default = np.random.choice([0, 1], size=data_size, p=[0.7, 0.3])  # 0 = No default, 1 = Default

# Create a DataFrame
df = pd.DataFrame({
    'age': age,
    'income': income,
    'debt': debt,
    'default': default
})

# Display the first few rows of the dataset
print(df.head())

   age  income   debt  default
0   56   77733    801        0
1   46   90318  45736        0
2   32  134953   1199        0
3   60  114474    690        0
4   25   48664  10992        0


In [5]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Features and target variable
X = df[['age', 'income', 'debt']]
y = df['default']

# Split the data into training and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the data (SVM is sensitive to the scale of features)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [7]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score

# Train a Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict using the linear regression model
y_pred_lr = lr_model.predict(X_test)

# Convert predictions to binary classification (threshold = 0.5)
y_pred_lr_class = [1 if i >= 0.5 else 0 for i in y_pred_lr]

# Evaluate Linear Regression Model
accuracy_lr = accuracy_score(y_test, y_pred_lr_class)
print(f"Linear Regression Accuracy: {accuracy_lr:.2f}")

Linear Regression Accuracy: 0.57


In [9]:
from sklearn.ensemble import RandomForestClassifier

# Train a Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predict using the random forest model
y_pred_rf = rf_model.predict(X_test)

# Evaluate Random Forest Model
accuracy_rf = accuracy_score(y_test, y_pred_rf)
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")

Random Forest Accuracy: 0.50


In [10]:
from sklearn.svm import SVC

# Train an SVM classifier
svm_model = SVC(kernel='linear', random_state=42)
svm_model.fit(X_train_scaled, y_train)

# Predict using the SVM model
y_pred_svm = svm_model.predict(X_test_scaled)

# Evaluate SVM Model
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print(f"SVM Accuracy: {accuracy_svm:.2f}")

SVM Accuracy: 0.57


In [11]:
print("Model Comparison:")
print(f"Linear Regression Accuracy: {accuracy_lr:.2f}")
print(f"Random Forest Accuracy: {accuracy_rf:.2f}")
print(f"SVM Accuracy: {accuracy_svm:.2f}")

Model Comparison:
Linear Regression Accuracy: 0.57
Random Forest Accuracy: 0.50
SVM Accuracy: 0.57
