<a href="https://colab.research.google.com/github/MStamirski/Spaceship-Titanic/blob/main/Model_LinearSVC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preparation

In [None]:
from google.colab import drive
drive.mount("/content/drive")
%cd "/content/drive/MyDrive/Colab_Notebooks/SDA_upskill/Spaceship"

# Model with parameters

In [None]:
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
model = LinearSVC()

In [None]:
def objective(trial):

  par_c = trial.suggest_float('par_c', 1e-10, 1e10, log=True)

  optuna_model = LinearSVC(C=par_c)
  optuna_model.fit(X_train, y_train)

  y_pred = optuna_model.predict(X_val)
  accuracy = accuracy_score(y_val, y_pred)

  return accuracy

# Features datasets

In [None]:
!pip install import-ipynb

In [None]:
import import_ipynb

In [None]:
from FeaturesEngineering import get_features, categories_one_hot_encoding, categories_target_encoding, categories_leave_one_out_encoding

In [None]:
df = get_features('train')
df_ohe = categories_one_hot_encoding(df)
df_te = categories_target_encoding(df)
df_looe = categories_leave_one_out_encoding(df)

# Initial verification

In [None]:
from Optimization import verify_feature_dataset, get_subsets, model_optimization, test_tuned_model

In [None]:
acc_ohe_ini = verify_feature_dataset(df_ohe, model)

Accuracy: 77.37730061349694%


In [None]:
acc_te_ini = verify_feature_dataset(df_te, model)

Accuracy: 76.07361963190185%


In [None]:
acc_looe_ini = verify_feature_dataset(df_looe, model)

Accuracy: 75.72852760736197%


# Parameters tuning

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_ohe)
params = model_optimization(100, objective)
model = LinearSVC(C=params['par_c'])
result_ohe_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)

[32m[I 2023-04-08 18:39:08,104][0m A new study created in memory with name: no-name-07417fe1-8776-426a-8b26-980966043546[0m
[32m[I 2023-04-08 18:39:09,000][0m Trial 0 finished with value: 0.6678648454349388 and parameters: {'par_c': 7593.6243810489705}. Best is trial 0 with value: 0.6678648454349388.[0m
[32m[I 2023-04-08 18:39:09,027][0m Trial 1 finished with value: 0.798705966930266 and parameters: {'par_c': 0.0067182088129346985}. Best is trial 1 with value: 0.798705966930266.[0m
[32m[I 2023-04-08 18:39:09,534][0m Trial 2 finished with value: 0.7455068296189792 and parameters: {'par_c': 178953.60041702475}. Best is trial 1 with value: 0.798705966930266.[0m
[32m[I 2023-04-08 18:39:09,993][0m Trial 3 finished with value: 0.5204888569374551 and parameters: {'par_c': 6371243.654292144}. Best is trial 1 with value: 0.798705966930266.[0m
[32m[I 2023-04-08 18:39:10,457][0m Trial 4 finished with value: 0.6966211358734723 and parameters: {'par_c': 10018333.03885293}. Best is 


Number of finished trials: 100
Best trial:
Value: 0.7994248741912293
  Params: 
    par_c: 0.00593102224699834

Accuracy after tuning: 77.57331799884992%

Classification report
              precision    recall  f1-score   support

       False       0.79      0.74      0.77       861
        True       0.76      0.81      0.78       878

    accuracy                           0.78      1739
   macro avg       0.78      0.78      0.78      1739
weighted avg       0.78      0.78      0.78      1739


Confusion matrix
[[641 220]
 [170 708]]

All features number: 90, Selected features number: 32
Selected_features:

HomePlanet_Earth
HomePlanet_Europa
CryoSleep_False
CryoSleep_True
Cabin_deck_C
Cabin_deck_G
Cabin_side_P
Cabin_num/100_3
Cabin_num/100_4
Cabin_num/100_8
Cabin_num/100_9
Cabin_num/100_10
Cabin_num/100_11
RService_deciles_1
RService_deciles_7
RService_deciles_8
RService_deciles_9
RService_deciles_10
FCourt_deciles_7
FCourt_deciles_8
FCourt_deciles_10
ShMall_deciles_8
ShMall_deci

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_te)
params = model_optimization(100, objective)
model = LinearSVC(C=params['par_c'])
result_te_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)

[32m[I 2023-04-08 18:39:25,095][0m A new study created in memory with name: no-name-f9f79ac3-346f-48b5-9530-0a8dddf7f3a9[0m
[32m[I 2023-04-08 18:39:25,174][0m Trial 0 finished with value: 0.7692307692307693 and parameters: {'par_c': 0.17165155021089723}. Best is trial 0 with value: 0.7692307692307693.[0m
[32m[I 2023-04-08 18:39:25,292][0m Trial 1 finished with value: 0.7706685837526959 and parameters: {'par_c': 0.3267143160552438}. Best is trial 1 with value: 0.7706685837526959.[0m
[32m[I 2023-04-08 18:39:26,041][0m Trial 2 finished with value: 0.7742631200575126 and parameters: {'par_c': 4.933821740398624}. Best is trial 2 with value: 0.7742631200575126.[0m
[32m[I 2023-04-08 18:39:26,782][0m Trial 3 finished with value: 0.6326383896477354 and parameters: {'par_c': 455.32286412140814}. Best is trial 2 with value: 0.7742631200575126.[0m
[32m[I 2023-04-08 18:39:26,809][0m Trial 4 finished with value: 0.4996405463695183 and parameters: {'par_c': 2.2717223609337107e-06}. B


Number of finished trials: 100
Best trial:
Value: 0.7749820273184759
  Params: 
    par_c: 16.63710131735602

Accuracy after tuning: 76.25071880391029%

Classification report
              precision    recall  f1-score   support

       False       0.76      0.76      0.76       861
        True       0.77      0.76      0.76       878

    accuracy                           0.76      1739
   macro avg       0.76      0.76      0.76      1739
weighted avg       0.76      0.76      0.76      1739


Confusion matrix
[[657 204]
 [209 669]]

All features number: 14, Selected features number: 8
Selected_features:

HomePlanet_transformed
Destination_transformed
VIP_transformed
Cabin_side_transformed
Cabin_num/100_transformed
RService_deciles_transformed
Spa_deciles_transformed
VRD_deciles_transformed


In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_looe)
params = model_optimization(100, objective)
model = LinearSVC(C=params['par_c'])
result_looe_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)

[32m[I 2023-04-08 18:40:05,720][0m A new study created in memory with name: no-name-6d3fb797-cb79-450a-bf75-3cd7ac31c4ac[0m
[32m[I 2023-04-08 18:40:06,149][0m Trial 0 finished with value: 0.5125808770668584 and parameters: {'par_c': 1566564805.4624257}. Best is trial 0 with value: 0.5125808770668584.[0m
[32m[I 2023-04-08 18:40:06,612][0m Trial 1 finished with value: 0.586628324946082 and parameters: {'par_c': 8071.152978466635}. Best is trial 1 with value: 0.586628324946082.[0m
[32m[I 2023-04-08 18:40:07,056][0m Trial 2 finished with value: 0.7699496764917325 and parameters: {'par_c': 8.252923498375232}. Best is trial 2 with value: 0.7699496764917325.[0m
[32m[I 2023-04-08 18:40:07,091][0m Trial 3 finished with value: 0.7706685837526959 and parameters: {'par_c': 0.14800078018471557}. Best is trial 3 with value: 0.7706685837526959.[0m
[32m[I 2023-04-08 18:40:07,551][0m Trial 4 finished with value: 0.5384615384615384 and parameters: {'par_c': 21892.553592677625}. Best is 


Number of finished trials: 100
Best trial:
Value: 0.7728253055355859
  Params: 
    par_c: 0.286712025483484

Accuracy after tuning: 76.19321449108683%

Classification report
              precision    recall  f1-score   support

       False       0.75      0.77      0.76       861
        True       0.77      0.75      0.76       878

    accuracy                           0.76      1739
   macro avg       0.76      0.76      0.76      1739
weighted avg       0.76      0.76      0.76      1739


Confusion matrix
[[667 194]
 [220 658]]

All features number: 14, Selected features number: 6
Selected_features:

HomePlanet_transformed
Cabin_side_transformed
Cabin_num/100_transformed
RService_deciles_transformed
Spa_deciles_transformed
VRD_deciles_transformed


# Save results

In [None]:
data = []
data.append(['LinearSVC', 'OHE', acc_ohe_ini, result_ohe_tuned[0], str(result_ohe_tuned[1])+" / 90"])
data.append(['LinearSVC', 'TE', acc_te_ini, result_te_tuned[0], str(result_te_tuned[1])+" / 14"])
data.append(['LinearSVC', 'LOOE', acc_looe_ini, result_looe_tuned[0], str(result_looe_tuned[1])+" / 14"])

In [None]:
import csv
from os.path import exists
resfile = 'spaceship_results.csv'

In [None]:
if exists(resfile):
  f = open(resfile, 'a')
  writer = csv.writer(f)
else:
  header = ['Model', 'Categories_encoding', 'Initial_accuracy', 'Tuned_Accuracy', 'Important_Features']
  f = open(resfile, 'w', newline='')
  writer = csv.writer(f)
  writer.writerow(header)

writer.writerows(data)
f.close()