In [26]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import accuracy_score, classification_report, ConfusionMatrixDisplay,confusion_matrix
import pickle 

pd.set_option('display.max_column', None)
pd.set_option('display.max_rows',100)
import warnings
warnings.filterwarnings('ignore')

In [3]:
df = pd.read_csv('FinalData.csv')

In [4]:
df.head()

Unnamed: 0.1,Unnamed: 0,age,expenses,financial_status,experience,estimated_period,unwilling_toAccept,scenario,sum_scores,salary
0,0,1,4,3,1,4,2,1,3,11808184
1,1,1,4,4,1,2,1,1,2,36993565
2,2,2,2,2,3,3,4,2,3,14574651
3,3,4,1,1,3,1,1,4,2,7782938
4,4,4,1,4,4,1,4,2,3,3781845


In [5]:
df.drop(columns='Unnamed: 0',inplace=True)

In [6]:
df.head()

Unnamed: 0,age,expenses,financial_status,experience,estimated_period,unwilling_toAccept,scenario,sum_scores,salary
0,1,4,3,1,4,2,1,3,11808184
1,1,4,4,1,2,1,1,2,36993565
2,2,2,2,3,3,4,2,3,14574651
3,4,1,1,3,1,1,4,2,7782938
4,4,1,4,4,1,4,2,3,3781845


In [7]:
df.duplicated().any()

False

## **DATA SPLITTING**

In [11]:
#Splitting inference data
inference = df.sample(1)
inference

Unnamed: 0,age,expenses,financial_status,experience,estimated_period,unwilling_toAccept,scenario,sum_scores,salary
256,2,2,2,2,4,3,3,3,94792708


In [12]:
#Dropping inferenced data
df = df.drop(inference.index, axis = 0)

In [13]:
X = df.drop('sum_scores',axis=1)
y = df['sum_scores']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [14]:
print(f'X_train shape : ',X_train.shape)
print(f'X_test shape : ',X_test.shape)
print(f'y_train shape : ',y_train.shape)
print(f'y_test shape : ',y_test.shape)

X_train shape :  (7499, 8)
X_test shape :  (2500, 8)
y_train shape :  (7499,)
y_test shape :  (2500,)


## **PREPROCESSING PIPELINE**

In [15]:
#Scaling on salary column
num_pipeline = Pipeline([
    ('scaler', StandardScaler())])

train_tr = num_pipeline.fit_transform(X_train[['salary']])

In [16]:
#Creating a column transformer to transform salary columns
full_pipeline = ColumnTransformer([
        ("num", num_pipeline, ['salary']),
    ], remainder='passthrough')

## **FULL PIPELINE**

In [17]:
#Initiating SVC Model
svc = SVC()

In [18]:
#Initiating all process pipeline
all_process = Pipeline([
    ("preprocessing", full_pipeline),
    ("svc", svc )
])

In [19]:
all_process.fit(X_train, y_train)

In [20]:
train_result = all_process.predict(X_train)
test_result = all_process.predict(X_test)

In [21]:
print("=="*20,"TRAINING","=="*20)
print(classification_report(y_train,train_result,zero_division=0))
print("=="*20,"TEST","=="*20)
print(classification_report(y_test,test_result,zero_division=0))

              precision    recall  f1-score   support

           1       1.00      0.48      0.65        58
           2       0.98      1.00      0.99      1832
           3       1.00      1.00      1.00      4952
           4       0.99      1.00      1.00       652
           5       0.00      0.00      0.00         5

    accuracy                           1.00      7499
   macro avg       0.80      0.70      0.73      7499
weighted avg       0.99      1.00      0.99      7499

              precision    recall  f1-score   support

           1       1.00      0.48      0.65        29
           2       0.97      1.00      0.99       565
           3       1.00      1.00      1.00      1681
           4       0.99      1.00      1.00       223
           5       0.00      0.00      0.00         2

    accuracy                           0.99      2500
   macro avg       0.79      0.70      0.73      2500
weighted avg       0.99      0.99      0.99      2500



## **INFERENCE**

In [22]:
inference = inference[['salary','age','expenses','financial_status','experience','estimated_period','unwilling_toAccept','scenario']]

In [25]:
all_process.predict(inference)

array([3])

In [27]:
# save model
pickle.dump(all_process, open("risk_classification.pkl", "wb"))

In [28]:
# STEP 1 - Import saved model
model = pickle.load(open("risk_classification.pkl", "rb"))

In [29]:
# STEP 3 - model predict input data
Category = model.predict(inference).tolist()[0]
Category

3