In [48]:
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer


# Exploratory data analysis
import warnings
warnings.filterwarnings("ignore")
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set(color_codes=True)
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.figure_factory as ff

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import LogisticRegression
from sklearn import preprocessing

from IPython.display import display

from sklearn.svm import OneClassSVM
from numpy import where
from pathlib import Path

In [49]:
class LinearSVM:
    def __init__(self, learning_rate=0.001, epochs=10000, C=1.0):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.C = C
        self.weights = None
        self.bias = None

    def train(self, X, y):
        num_samples, num_features = X.shape
        self.weights = np.zeros(num_features)
        self.bias = 0
        min = float('inf')
        max = float('inf')
        for epoch in range(self.epochs):
            # print(self.weights)
            for i in range(num_samples):
                pred = np.dot(X[i], self.weights) - self.bias
                condition = y[i] * pred > 0
                if not condition:
                    self.weights -= self.learning_rate * (2 * self.C * self.weights - np.dot(X[i], y[i]))
                    self.bias -= self.learning_rate * y[i]

    def predict(self, X):
        return np.sign(np.dot(X, self.weights) - self.bias)

In [50]:
# Import training data
data_folder = Path("data-sets")
train_data = pd.read_csv("data-sets/train.csv")
test_data = pd.read_csv("data-sets/test.csv")

#columns to drop
drop = ['ID', 'Customer_ID', 'Month', 'Name', 'SSN', 'Occupation', 'Type_of_Loan', 'Credit_Mix', 'Credit_History_Age', 'Payment_of_Min_Amount', 'Payment_Behaviour']

#columns to clean
clean = ['Age', 'Annual_Income', 'Monthly_Inhand_Salary', 'Num_Bank_Accounts', 'Num_Credit_Card', 'Interest_Rate', 'Num_of_Loan', 'Delay_from_due_date', 'Num_of_Delayed_Payment', 'Changed_Credit_Limit', 'Num_Credit_Inquiries', 'Outstanding_Debt', 'Credit_Utilization_Ratio', 'Credit_History_Age', 'Total_EMI_per_month', 'Amount_invested_monthly', 'Monthly_Balance']

#clean columns
for item in clean:
    train_data[item] = (train_data[item].astype(str)).str.extract(r'^((?:\d*\.\d+|\d+)$)', expand=False).astype(float)

#map credit scores to number
creditScoreMap = {'Poor': -1, 'Standard': 2, 'Good': 1}
train_data['Credit_Score'] = train_data['Credit_Score'].replace(creditScoreMap)
train_data = train_data[train_data['Credit_Score'] != 2]
# for item in clean:
#     print(train[item])

#copy data and drop non-numeric columns
df = train_data.copy().drop(columns=drop)
df.dropna(inplace=True)
df = df.reset_index(drop=True)

In [51]:
X, y = df.drop(columns=['Credit_Score']), df['Credit_Score']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)

svm_model = LinearSVM()
svm_model.train(X_train, y_train)


KeyError: 0

In [None]:
y_pred = svm_model.predict(X_train)
# print("Predictions:", y_pred)

accuracy = accuracy_score(y_train, y_pred)
print(f"Training Accuracy: {accuracy * 100:.2f}%")

In [None]:
y_pred = svm_model.predict(X_test)
# print("Predictions:", y_pred)

accuracy = accuracy_score(y_test, y_pred)
print(f"Testing Accuracy: {accuracy * 100:.2f}%")

In [None]:
svm_model = SVC(kernel='linear', C=1.0)

svm_model.fit(X_train, y_train)

y_pred = svm_model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")