In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv('Credit-Data-Cleaned.csv')
df

Unnamed: 0,duration,credit_amount,age,checking_status,credit_history,purpose,employment,property_magnitude,housing,gender,marital_status,savings_status,class
0,6.0,1169.0,67.0,<0,critical/other existing credit,radio/tv,>=7,real estate,own,male,single,no known savings,good
1,48.0,5951.0,22.0,0<=X<200,existing paid,radio/tv,1<=X<4,real estate,own,female,divorced/dependent/married,<100,bad
2,12.0,2096.0,49.0,no checking,critical/other existing credit,education,4<=X<7,real estate,own,male,single,<100,good
3,42.0,7882.0,45.0,<0,existing paid,furniture/equipment,4<=X<7,life insurance,for free,male,single,<100,good
4,24.0,4870.0,53.0,<0,delayed previously,new car,1<=X<4,no known property,for free,male,single,<100,bad
...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,12.0,1736.0,31.0,no checking,existing paid,furniture/equipment,4<=X<7,real estate,own,female,divorced/dependent/married,<100,good
996,30.0,3857.0,40.0,<0,existing paid,used car,1<=X<4,life insurance,own,male,divorced/separated,<100,good
997,12.0,804.0,38.0,no checking,existing paid,radio/tv,>=7,car,own,male,single,<100,good
998,45.0,1845.0,23.0,<0,existing paid,radio/tv,1<=X<4,no known property,for free,male,single,<100,bad


In [3]:
y = df['class'].values
y

array(['good', 'bad', 'good', 'good', 'bad', 'good', 'good', 'good',
       'good', 'bad', 'bad', 'bad', 'good', 'bad', 'good', 'bad', 'good',
       'good', 'bad', 'good', 'good', 'good', 'good', 'good', 'good',
       'good', 'good', 'good', 'good', 'bad', 'good', 'good', 'good',
       'good', 'good', 'bad', 'good', 'bad', 'good', 'good', 'good',
       'good', 'good', 'good', 'bad', 'good', 'good', 'good', 'good',
       'good', 'good', 'good', 'good', 'good', 'bad', 'good', 'bad',
       'good', 'good', 'bad', 'good', 'good', 'bad', 'bad', 'good',
       'good', 'good', 'good', 'bad', 'good', 'good', 'good', 'good',
       'good', 'bad', 'good', 'bad', 'good', 'good', 'good', 'bad',
       'good', 'good', 'good', 'good', 'good', 'good', 'bad', 'good',
       'bad', 'good', 'good', 'bad', 'good', 'good', 'bad', 'good',
       'good', 'good', 'good', 'good', 'good', 'good', 'good', 'good',
       'bad', 'bad', 'good', 'good', 'good', 'good', 'good', 'good',
       'bad', 'good', 'go

In [4]:
df = df.drop('class',axis=1)
df

Unnamed: 0,duration,credit_amount,age,checking_status,credit_history,purpose,employment,property_magnitude,housing,gender,marital_status,savings_status
0,6.0,1169.0,67.0,<0,critical/other existing credit,radio/tv,>=7,real estate,own,male,single,no known savings
1,48.0,5951.0,22.0,0<=X<200,existing paid,radio/tv,1<=X<4,real estate,own,female,divorced/dependent/married,<100
2,12.0,2096.0,49.0,no checking,critical/other existing credit,education,4<=X<7,real estate,own,male,single,<100
3,42.0,7882.0,45.0,<0,existing paid,furniture/equipment,4<=X<7,life insurance,for free,male,single,<100
4,24.0,4870.0,53.0,<0,delayed previously,new car,1<=X<4,no known property,for free,male,single,<100
...,...,...,...,...,...,...,...,...,...,...,...,...
995,12.0,1736.0,31.0,no checking,existing paid,furniture/equipment,4<=X<7,real estate,own,female,divorced/dependent/married,<100
996,30.0,3857.0,40.0,<0,existing paid,used car,1<=X<4,life insurance,own,male,divorced/separated,<100
997,12.0,804.0,38.0,no checking,existing paid,radio/tv,>=7,car,own,male,single,<100
998,45.0,1845.0,23.0,<0,existing paid,radio/tv,1<=X<4,no known property,for free,male,single,<100


In [5]:
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns
categorical_columns = [x for x in df.columns if x not in numerical_columns]
numerical_columns, categorical_columns

(Index(['duration', 'credit_amount', 'age'], dtype='object'),
 ['checking_status',
  'credit_history',
  'purpose',
  'employment',
  'property_magnitude',
  'housing',
  'gender',
  'marital_status',
  'savings_status'])

In [6]:
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
y

array([1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,
       0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1,
       1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0,
       1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 1, 0,
       1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1,
       0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,
       1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1,

In [7]:
preprocessor = ColumnTransformer(
    transformers=[
        ('num', StandardScaler(), numerical_columns),  # Normalize numerical data
        ('cat', OneHotEncoder(), categorical_columns)  # One-hot encode categorical data
    ]
)

In [15]:
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', SVC(C= 1, kernel='rbf'))
])

In [18]:
x_train, x_test, y_train, y_test = train_test_split(df, y, test_size=0.2, random_state=42)

In [19]:
pipeline.fit(x_train, y_train)

In [20]:
y_pred = pipeline.predict(x_test)
decoded_pred = label_encoder.inverse_transform(y_pred)

In [25]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test,y_pred)

0.775

In [27]:
import pickle
with open('pipeline.pkl', 'wb') as f:
    pickle.dump(pipeline, f)

with open('label_encoder.pkl', 'wb') as f:
    pickle.dump(label_encoder, f)

In [31]:
x_test.shape

(200, 12)