In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('darkgrid')
import joblib
from sklearn.impute import SimpleImputer
from sklearn.metrics import classification_report, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score, roc_curve
from numpy import random
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, StratifiedKFold, train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVC
from datetime import datetime

In [7]:
data = pd.read_csv('aps_failure_training_set.csv',na_values= ['na'])

In [8]:
print('Shape of train dataset:', data.shape)
data.head()

Shape of train dataset: (60000, 171)


Unnamed: 0,class,aa_000,ab_000,ac_000,ad_000,ae_000,af_000,ag_000,ag_001,ag_002,...,ee_002,ee_003,ee_004,ee_005,ee_006,ee_007,ee_008,ee_009,ef_000,eg_000
0,neg,76698,,2130706000.0,280.0,0.0,0.0,0.0,0.0,0.0,...,1240520.0,493384.0,721044.0,469792.0,339156.0,157956.0,73224.0,0.0,0.0,0.0
1,neg,33058,,0.0,,0.0,0.0,0.0,0.0,0.0,...,421400.0,178064.0,293306.0,245416.0,133654.0,81140.0,97576.0,1500.0,0.0,0.0
2,neg,41040,,228.0,100.0,0.0,0.0,0.0,0.0,0.0,...,277378.0,159812.0,423992.0,409564.0,320746.0,158022.0,95128.0,514.0,0.0,0.0
3,neg,12,0.0,70.0,66.0,0.0,10.0,0.0,0.0,0.0,...,240.0,46.0,58.0,44.0,10.0,0.0,0.0,0.0,4.0,32.0
4,neg,60874,,1368.0,458.0,0.0,0.0,0.0,0.0,0.0,...,622012.0,229790.0,405298.0,347188.0,286954.0,311560.0,433954.0,1218.0,0.0,0.0


In [9]:
threshold = 0.5
data = data.dropna(thresh=threshold*int(len(data)),axis=1)

In [10]:
data.shape

(60000, 163)

In [11]:
set_t = set(data.columns)

In [12]:
single_valued = [col for col in data.columns if data[col].value_counts().shape[0] == 1]
single_valued

['cd_000']

In [13]:
 # Dropp rows with missing values
data.drop('cd_000',axis=1,inplace=True) 

##### Splitting data into features (X_value) and target (y_value)

In [14]:
X_value = data.drop('class',axis=1)  # Assuming 'class' is target column
y_value = data['class'].map({'neg':0,'pos':1}) # Convert 'neg' to 0 and 'pos' to 1 for binary classification

$$Data Preprocessing$$

In [15]:
# imputing the rest of the dataset with median values
imputer = SimpleImputer(strategy='median')
# fitting the train data
imputer.fit(X_value)

# transforming both train and test datasets
X_value_imputed = pd.DataFrame(imputer.transform(X_value), columns=X_value.columns)

$$Normalizing ((min-max), Scaling)$$

In [16]:
# declaring the normalizer
normalizer = MinMaxScaler()

# fitting the scaler
normalizer.fit_transform(X_value_imputed)
X_norm = normalizer.transform(X_value_imputed)

In [17]:
# Train-Test Split (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_norm,y_value,random_state=101,test_size=0.2)

$$Hyperparameter-tuning-with-GridSearchCV$$

In [18]:
def tuner(data, labels, model, parameters, metrics='f1_macro'):
    '''
        This function does hyperparameter tuning for a specific model given a dictionary containing all the hyperparameters.
        The scoring here used will be the f1 score.
    '''
    start = datetime.now()
    # declaring the gridsearch object
    gridsearch = GridSearchCV(estimator=model,
                             param_grid=parameters,
                             scoring='f1_macro',
                             n_jobs=-1,
                             verbose=100,
                             cv=3,
                             return_train_score=True)
    # fit the data
    gridsearch.fit(data, labels)
    print("Best Parameters:", gridsearch.best_params_)
    print("F1 score:", gridsearch.best_score_)
    print("Time taken:", datetime.now()-start)
    
    return gridsearch

In [14]:
svc = SVC(kernel='rbf',C=0.1)

$$SVC-Classifier$$

In [21]:
svc_params = {'kernel': ['linear', 'rbf', 'poly'], 'C': [0.1, 1, 10], 'gamma': ['scale', 'auto']}
svc_grid = GridSearchCV(SVC(), param_grid=svc_params, cv=5, scoring='f1_macro')
svc_grid.fit(X_train, y_train)
svc_best = svc_grid.best_estimator_

In [22]:
print(svc_best)

SVC(C=10, kernel='poly')


In [27]:
svcc_pred = svc_best.predict(X_test)
print(classification_report(svcc_pred,y_test),confusion_matrix(svcc_pred,y_test))

              precision    recall  f1-score   support

         neg       1.00      0.99      1.00     11830
         pos       0.68      0.76      0.72       170

    accuracy                           0.99     12000
   macro avg       0.84      0.88      0.86     12000
weighted avg       0.99      0.99      0.99     12000
 [[11770    60]
 [   41   129]]


In [28]:
svc_pred = svc_best.predict(X_test)
print(classification_report(svc_pred,y_test),confusion_matrix(svc_pred,y_test))

              precision    recall  f1-score   support

         neg       1.00      0.99      1.00     11830
         pos       0.68      0.76      0.72       170

    accuracy                           0.99     12000
   macro avg       0.84      0.88      0.86     12000
weighted avg       0.99      0.99      0.99     12000
 [[11770    60]
 [   41   129]]


$$Logistic-Regression-Classifier$$

In [18]:
# Logistic Regression
logreg_params = {'penalty': ['l1', 'l2'], 'C': [0.1, 1, 10], 'solver': ['liblinear']}
logreg_grid = GridSearchCV(LogisticRegression(), param_grid=logreg_params, cv=5, scoring='f1_macro')
logreg_grid.fit(X_train, y_train)
logreg_best = logreg_grid.best_estimator_

In [19]:
print(logreg_best)

LogisticRegression(C=10, penalty='l1', solver='liblinear')


In [29]:
log_pred = logreg_best.predict(X_test)
print(classification_report(log_pred,y_test),confusion_matrix(log_pred,y_test))

              precision    recall  f1-score   support

         neg       1.00      0.99      1.00     11849
         pos       0.63      0.79      0.70       151

    accuracy                           0.99     12000
   macro avg       0.81      0.89      0.85     12000
weighted avg       0.99      0.99      0.99     12000
 [[11779    70]
 [   32   119]]


$$Decision Tree Classifier$$

In [24]:
# Decision Tree
dt_params = {'max_depth': [10, 15, 25], 'min_samples_leaf': [1, 5, 10]}
dt_grid = GridSearchCV(DecisionTreeClassifier(), param_grid=dt_params, cv=5, scoring='f1_macro')
dt_grid.fit(X_train, y_train)
dt_best = dt_grid.best_estimator_

In [25]:
print(dt_best)

DecisionTreeClassifier(max_depth=25)


In [30]:
dt_pred = dt_best.predict(X_test)
print(classification_report(dt_pred,y_test),confusion_matrix(dt_pred,y_test))

              precision    recall  f1-score   support

         neg       1.00      1.00      1.00     11816
         pos       0.71      0.73      0.72       184

    accuracy                           0.99     12000
   macro avg       0.85      0.86      0.86     12000
weighted avg       0.99      0.99      0.99     12000
 [[11761    55]
 [   50   134]]


###### Performance on Test Data (From test_data_csv_file)

In [120]:
data_test = pd.read_csv('aps_failure_test_set.csv',na_values='na')
print(data_test)

Unnamed: 0,class,aa_000,ab_000,ac_000,ad_000,ae_000,af_000,ag_000,ag_001,ag_002,...,ee_002,ee_003,ee_004,ee_005,ee_006,ee_007,ee_008,ee_009,ef_000,eg_000
0,neg,60,0.0,2.000000e+01,12.0,0.0,0.0,0.0,0.0,0.0,...,1098.0,138.0,412.0,654.0,78.0,88.0,0.0,0.0,0.0,0.0
1,neg,82,0.0,6.800000e+01,40.0,0.0,0.0,0.0,0.0,0.0,...,1068.0,276.0,1620.0,116.0,86.0,462.0,0.0,0.0,0.0,0.0
2,neg,66002,2.0,2.120000e+02,112.0,0.0,0.0,0.0,0.0,0.0,...,495076.0,380368.0,440134.0,269556.0,1315022.0,153680.0,516.0,0.0,0.0,0.0
3,neg,59816,,1.010000e+03,936.0,0.0,0.0,0.0,0.0,0.0,...,540820.0,243270.0,483302.0,485332.0,431376.0,210074.0,281662.0,3232.0,0.0,0.0
4,neg,1814,,1.560000e+02,140.0,0.0,0.0,0.0,0.0,0.0,...,7646.0,4144.0,18466.0,49782.0,3176.0,482.0,76.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15995,neg,81852,,2.130706e+09,892.0,0.0,0.0,0.0,0.0,0.0,...,632658.0,273242.0,510354.0,373918.0,349840.0,317840.0,960024.0,25566.0,0.0,0.0
15996,neg,18,0.0,5.200000e+01,46.0,8.0,26.0,0.0,0.0,0.0,...,266.0,44.0,46.0,14.0,2.0,0.0,0.0,0.0,0.0,0.0
15997,neg,79636,,1.670000e+03,1518.0,0.0,0.0,0.0,0.0,0.0,...,806832.0,449962.0,778826.0,581558.0,375498.0,222866.0,358934.0,19548.0,0.0,0.0
15998,neg,110,,3.600000e+01,32.0,0.0,0.0,0.0,0.0,0.0,...,588.0,210.0,180.0,544.0,1004.0,1338.0,74.0,0.0,0.0,0.0


In [118]:
set2 = set(data_test.columns)
set3 = set2 - set_t
set3

{'bm_000'}

In [121]:
data_test = data_test.drop(['ab_000','bm_000','bn_000','bo_000','bp_000','bq_000','br_000','cr_000','cd_000'],axis=1)

In [122]:
data_test

Unnamed: 0,class,aa_000,ac_000,ad_000,ae_000,af_000,ag_000,ag_001,ag_002,ag_003,...,ee_002,ee_003,ee_004,ee_005,ee_006,ee_007,ee_008,ee_009,ef_000,eg_000
0,neg,60,2.000000e+01,12.0,0.0,0.0,0.0,0.0,0.0,2682.0,...,1098.0,138.0,412.0,654.0,78.0,88.0,0.0,0.0,0.0,0.0
1,neg,82,6.800000e+01,40.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1068.0,276.0,1620.0,116.0,86.0,462.0,0.0,0.0,0.0,0.0
2,neg,66002,2.120000e+02,112.0,0.0,0.0,0.0,0.0,0.0,199486.0,...,495076.0,380368.0,440134.0,269556.0,1315022.0,153680.0,516.0,0.0,0.0,0.0
3,neg,59816,1.010000e+03,936.0,0.0,0.0,0.0,0.0,0.0,0.0,...,540820.0,243270.0,483302.0,485332.0,431376.0,210074.0,281662.0,3232.0,0.0,0.0
4,neg,1814,1.560000e+02,140.0,0.0,0.0,0.0,0.0,0.0,0.0,...,7646.0,4144.0,18466.0,49782.0,3176.0,482.0,76.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15995,neg,81852,2.130706e+09,892.0,0.0,0.0,0.0,0.0,0.0,0.0,...,632658.0,273242.0,510354.0,373918.0,349840.0,317840.0,960024.0,25566.0,0.0,0.0
15996,neg,18,5.200000e+01,46.0,8.0,26.0,0.0,0.0,0.0,0.0,...,266.0,44.0,46.0,14.0,2.0,0.0,0.0,0.0,0.0,0.0
15997,neg,79636,1.670000e+03,1518.0,0.0,0.0,0.0,0.0,0.0,0.0,...,806832.0,449962.0,778826.0,581558.0,375498.0,222866.0,358934.0,19548.0,0.0,0.0
15998,neg,110,3.600000e+01,32.0,0.0,0.0,0.0,0.0,0.0,0.0,...,588.0,210.0,180.0,544.0,1004.0,1338.0,74.0,0.0,0.0,0.0


In [123]:
Xt1 = data_test.drop('class',axis=1)
yt1 = data_test['class'].map({'neg':0,'pos':1})

In [124]:
imputer.fit(Xt1)
# transforming both train and test datasets
Xt1_imputed = pd.DataFrame(imputer.transform(Xt1), columns=Xt1.columns)
normalizer.fit_transform(Xt1_imputed)
Xt1_norm = normalizer.transform(Xt1_imputed)

In [125]:
Xt1_norm.shape

(16000, 161)

In [126]:
yt1.shape

(16000,)

$$ Task - 02$$

###### Part A (undersampling the majority class)

In [19]:
data_01 = data[data['class']=='neg'].sample(30000)
data_02 = data[data['class']=='pos']
data_final = pd.concat([data_01,data_02],axis=0)

In [20]:
data_final

Unnamed: 0,class,aa_000,ac_000,ad_000,ae_000,af_000,ag_000,ag_001,ag_002,ag_003,...,ee_002,ee_003,ee_004,ee_005,ee_006,ee_007,ee_008,ee_009,ef_000,eg_000
32368,neg,28016,478.0,452.0,0.0,0.0,0.0,0.0,0.0,0.0,...,280582.0,122810.0,275076.0,295532.0,197546.0,72624.0,1842.0,370.0,0.0,0.0
55669,neg,40580,500.0,446.0,0.0,0.0,0.0,0.0,0.0,0.0,...,300418.0,164870.0,502318.0,540258.0,202906.0,59648.0,20822.0,2.0,0.0,0.0
32135,neg,79826,486.0,362.0,0.0,0.0,0.0,0.0,0.0,2520.0,...,885182.0,405066.0,670998.0,462884.0,301438.0,210238.0,532286.0,93330.0,0.0,0.0
23371,neg,40346,284.0,190.0,0.0,0.0,0.0,0.0,0.0,0.0,...,352704.0,213778.0,527042.0,439968.0,215900.0,81466.0,41620.0,208.0,0.0,0.0
44901,neg,14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,70.0,28.0,64.0,76.0,86.0,4.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59484,pos,895178,,,,,0.0,0.0,0.0,0.0,...,9116224.0,4276644.0,8701496.0,8082264.0,5827284.0,2057354.0,1662302.0,10790.0,,
59601,pos,862134,,,,,0.0,38834.0,1227952.0,8877294.0,...,3456564.0,1793170.0,4159190.0,5847384.0,8364506.0,12875424.0,661442.0,2458.0,,
59692,pos,186856,,,0.0,0.0,0.0,0.0,4300.0,910488.0,...,2713108.0,800182.0,322322.0,71638.0,34662.0,7304.0,2538.0,0.0,0.0,0.0
59742,pos,605092,,,,,0.0,44320.0,1048970.0,7820828.0,...,3940400.0,1865730.0,3698692.0,3271958.0,9831898.0,3755392.0,65610.0,0.0,,


In [21]:
X1 = data_final.drop('class',axis=1)
y1 = data_final['class'].map({'neg':0,'pos':1})

In [22]:
imputer.fit(X1)
# transforming both train and test datasets
X1_imputed = pd.DataFrame(imputer.transform(X1), columns=X1.columns)
normalizer.fit_transform(X1_imputed)
X1_norm = normalizer.transform(X1_imputed)

In [23]:
X1_norm.shape

(31000, 161)

In [24]:
X1_train, X1_test, y1_train, y1_test = train_test_split(X1_norm,y1,random_state=101,test_size=0.2)

In [25]:
model_svm = SVC(C=10, kernel='poly')

In [26]:
model_svm.fit(X1_train,y1_train)

In [27]:
pred = model_svm.predict(X_test)
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11783
           1       0.87      0.76      0.81       217

    accuracy                           0.99     12000
   macro avg       0.93      0.88      0.90     12000
weighted avg       0.99      0.99      0.99     12000



In [28]:
pred_t = model_svm.predict(X_train)
print(classification_report(pred_t,y_train))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     47123
           1       0.86      0.79      0.83       877

    accuracy                           0.99     48000
   macro avg       0.93      0.90      0.91     48000
weighted avg       0.99      0.99      0.99     48000



In [29]:
model_new=DecisionTreeClassifier(max_depth=25,max_leaf_nodes=10)

In [30]:
model_new.fit(X1_train,y1_train)

In [31]:
pred = model_new.predict(X1_test)
print(classification_report(pred,y1_test))
pred = model_new.predict(X_test)
print(classification_report(pred,y_test))
pred_t = model_new.predict(X_train)
print(classification_report(pred_t,y_train))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      6018
           1       0.65      0.73      0.69       182

    accuracy                           0.98      6200
   macro avg       0.82      0.86      0.84      6200
weighted avg       0.98      0.98      0.98      6200

              precision    recall  f1-score   support

           0       0.99      1.00      0.99     11790
           1       0.70      0.63      0.66       210

    accuracy                           0.99     12000
   macro avg       0.85      0.81      0.83     12000
weighted avg       0.99      0.99      0.99     12000

              precision    recall  f1-score   support

           0       0.99      0.99      0.99     47155
           1       0.69      0.66      0.68       845

    accuracy                           0.99     48000
   macro avg       0.84      0.83      0.84     48000
weighted avg       0.99      0.99      0.99     48000



In [32]:
pred = model_svm.predict(X_test)
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11783
           1       0.87      0.76      0.81       217

    accuracy                           0.99     12000
   macro avg       0.93      0.88      0.90     12000
weighted avg       0.99      0.99      0.99     12000



In [33]:
pred_t = model_svm.predict(X_train)
print(classification_report(pred_t,y_train))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     47123
           1       0.86      0.79      0.83       877

    accuracy                           0.99     48000
   macro avg       0.93      0.90      0.91     48000
weighted avg       0.99      0.99      0.99     48000



In [34]:
model_log1 =LogisticRegression(C=10, penalty='l1', solver='liblinear')

In [35]:
model_log1.fit(X1_train,y1_train)

In [36]:
pred = model_log1.predict(X1_test)
print(classification_report(pred,y1_test))
pred = model_log1.predict(X_test)
print(classification_report(pred,y_test))
pred_t = model_log1.predict(X_train)
print(classification_report(pred_t,y_train))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      6024
           1       0.72      0.83      0.77       176

    accuracy                           0.99      6200
   macro avg       0.86      0.91      0.88      6200
weighted avg       0.99      0.99      0.99      6200

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11812
           1       0.72      0.72      0.72       188

    accuracy                           0.99     12000
   macro avg       0.86      0.86      0.86     12000
weighted avg       0.99      0.99      0.99     12000

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     47200
           1       0.74      0.75      0.74       800

    accuracy                           0.99     48000
   macro avg       0.87      0.87      0.87     48000
weighted avg       0.99      0.99      0.99     48000



$$ Part B (Class-weight)$$

In [37]:
model_new=DecisionTreeClassifier(class_weight={0:1,1:2.69},max_depth=35,max_leaf_nodes=10)

In [38]:
model_new.fit(X_train,y_train)

In [39]:
pred=model_new.predict(X_test)

In [40]:
print(classification_report(y_test,pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11811
           1       0.78      0.70      0.74       189

    accuracy                           0.99     12000
   macro avg       0.89      0.85      0.87     12000
weighted avg       0.99      0.99      0.99     12000



In [41]:
model_svm=SVC(C=10,kernel='rbf',class_weight={0:0.4,1:2})

In [42]:
model_svm.fit(X_train,y_train)

In [43]:
pred=model_svm.predict(X_test)

In [44]:
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11820
           1       0.72      0.76      0.74       180

    accuracy                           0.99     12000
   macro avg       0.86      0.88      0.87     12000
weighted avg       0.99      0.99      0.99     12000



In [45]:
model_log = LogisticRegression(C=10, penalty='l1', solver='liblinear',class_weight={0:1,1:3})

In [46]:
model_log.fit(X_train,y_train)

In [47]:
pred = model_log.predict(X_test)

In [48]:
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99     11792
           1       0.73      0.66      0.70       208

    accuracy                           0.99     12000
   macro avg       0.86      0.83      0.85     12000
weighted avg       0.99      0.99      0.99     12000



$$Part C (Sample-weights)$$

In [49]:
model_new=DecisionTreeClassifier(max_depth=35,max_leaf_nodes=10)

In [50]:
from sklearn.utils.class_weight import compute_sample_weight

# Automatically compute sample weights for unbalanced classes
sample_weights = compute_sample_weight(class_weight={0:1,1:2.69}, y=y_train)

In [51]:
model_new.fit(X_train,y_train,sample_weight=sample_weights)

In [52]:
pred = model_new.predict(X_test)

In [53]:
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11829
           1       0.70      0.78      0.74       171

    accuracy                           0.99     12000
   macro avg       0.85      0.89      0.87     12000
weighted avg       0.99      0.99      0.99     12000



In [54]:
model_svm_2 = SVC(C=10,kernel='poly')

In [55]:
sample_weights = compute_sample_weight(class_weight={0:0.33,1:1}, y=y_train)

In [56]:
model_svm_2.fit(X_train,y_train,sample_weight=sample_weights)

In [57]:
pred = model_svm_2.predict(X_test)
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00     11827
           1       0.70      0.77      0.73       173

    accuracy                           0.99     12000
   macro avg       0.85      0.88      0.87     12000
weighted avg       0.99      0.99      0.99     12000



In [58]:
model_log_2 = LogisticRegression(C=10, penalty='l1', solver='liblinear')

In [59]:
sample_weights = compute_sample_weight(class_weight={0:0.33,1:0.9}, y=y_train)

In [60]:
model_log_2.fit(X_train,y_train,sample_weight=sample_weights)

In [61]:
pred = model_log_2.predict(X_test)
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

           0       0.99      1.00      0.99     11801
           1       0.71      0.67      0.69       199

    accuracy                           0.99     12000
   macro avg       0.85      0.83      0.84     12000
weighted avg       0.99      0.99      0.99     12000



$$Part D$$

#### Data Augmentation via Noise Injection
###### Artificially increase the diversity of minority class by injecting small amounts of noise into features of minority class sample.

In [67]:
def add_noise(X, noise_level=0.01):
    noise = noise_level * np.random.normal(size=X.shape)
    return X + noise

In [68]:
# Apply noise to minority class samples
X_minority = X_train[y_train == 1]
X_noisy_minority = add_noise(X_minority)

In [69]:
# Append noisy samples to the original minority samples
X_augmented = np.vstack((X_train, X_noisy_minority))
y_augmented = np.hstack((y_train, np.ones(X_noisy_minority.shape[0])))

In [None]:
# Train your models on the augmented dataset
model_svm_3 = SVC(C=10,kernel='poly')

In [None]:
model_svm_3.fit(X_augmented, y_augmented)

In [76]:
pred = model_svm_3.predict(X_test)
print(classification_report(pred,y_test))

              precision    recall  f1-score   support

         0.0       0.97      1.00      0.98     11483
         1.0       0.94      0.34      0.50       517

    accuracy                           0.97     12000
   macro avg       0.95      0.67      0.74     12000
weighted avg       0.97      0.97      0.96     12000



In [None]:
model_log_3 = LogisticRegression(C=10, penalty='l1', solver='liblinear')

In [None]:
model_log_3.fit(X_augmented, y_augmented)

In [None]:
pred = model_log_3.predict(X_test)
print(classification_report(pred,y_test))

In [None]:
model_dt_3=DecisionTreeClassifier(max_depth=25,max_leaf_nodes=10)

In [None]:
model_dt_3.fit(X_augmented, y_augmented)

In [None]:
pred=model_dt_3.predict(X_test)

In [None]:
print(classification_report(y_test,pred))