<a href="https://colab.research.google.com/github/MStamirski/Spaceship-Titanic/blob/main/Model_Ridge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preparation

In [None]:
from google.colab import drive
drive.mount("/content/drive")
%cd "/content/drive/MyDrive/Colab_Notebooks/SDA_upskill/Spaceship"

# Model with parameters

In [None]:
from sklearn.linear_model import RidgeClassifier
from sklearn.metrics import accuracy_score

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
model = RidgeClassifier()

In [None]:
def objective(trial):

  alpha = trial.suggest_float('alpha', 1e-10, 1e10, log=True)

  optuna_model = RidgeClassifier(alpha=alpha)
  optuna_model.fit(X_train, y_train)

  y_pred = optuna_model.predict(X_val)
  accuracy = accuracy_score(y_val, y_pred)

  return accuracy

# Features datasets

In [None]:
!pip install import-ipynb

In [None]:
import import_ipynb

In [None]:
from FeaturesEngineering import get_features, categories_one_hot_encoding, categories_target_encoding, categories_leave_one_out_encoding

In [None]:
df = get_features('train')
df_ohe = categories_one_hot_encoding(df)
df_te = categories_target_encoding(df)
df_looe = categories_leave_one_out_encoding(df)

# Initial verification

In [None]:
from Optimization import verify_feature_dataset, get_subsets, model_optimization, test_tuned_model

In [None]:
acc_ohe_ini = verify_feature_dataset(df_ohe, model)

Accuracy: 77.10889570552148%


In [None]:
acc_te_ini = verify_feature_dataset(df_te, model)

Accuracy: 76.22699386503068%


In [None]:
acc_looe_ini = verify_feature_dataset(df_looe, model)

Accuracy: 75.920245398773%


# Parameters tuning

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_ohe)
params = model_optimization(100, objective)
model = RidgeClassifier(alpha=params['alpha'])
result_ohe_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)

[32m[I 2023-04-08 17:47:10,912][0m A new study created in memory with name: no-name-9a5e2b82-6bf3-463e-9acb-bdef3b8c46d3[0m
[32m[I 2023-04-08 17:47:10,965][0m Trial 0 finished with value: 0.7951114306254493 and parameters: {'alpha': 103.95059686789087}. Best is trial 0 with value: 0.7951114306254493.[0m
[32m[I 2023-04-08 17:47:11,015][0m Trial 1 finished with value: 0.7929547088425594 and parameters: {'alpha': 0.0009969511333633512}. Best is trial 0 with value: 0.7951114306254493.[0m
[32m[I 2023-04-08 17:47:11,071][0m Trial 2 finished with value: 0.7929547088425594 and parameters: {'alpha': 8.225417746217222e-07}. Best is trial 0 with value: 0.7951114306254493.[0m
[32m[I 2023-04-08 17:47:11,175][0m Trial 3 finished with value: 0.5025161754133717 and parameters: {'alpha': 548309.992491433}. Best is trial 0 with value: 0.7951114306254493.[0m
[32m[I 2023-04-08 17:47:11,232][0m Trial 4 finished with value: 0.4996405463695183 and parameters: {'alpha': 1448349637.2650511}. B


Number of finished trials: 100
Best trial:
Value: 0.7994248741912293
  Params: 
    alpha: 63.52006286202853

Accuracy after tuning: 77.51581368602645%

Classification report
              precision    recall  f1-score   support

       False       0.79      0.74      0.77       861
        True       0.76      0.81      0.78       878

    accuracy                           0.78      1739
   macro avg       0.78      0.77      0.77      1739
weighted avg       0.78      0.78      0.77      1739


Confusion matrix
[[640 221]
 [170 708]]

All features number: 90, Selected features number: 32
Selected_features:

HomePlanet_Earth
HomePlanet_Europa
CryoSleep_False
CryoSleep_True
Cabin_deck_C
Cabin_deck_G
Cabin_num/100_3
Cabin_num/100_4
Cabin_num/100_8
Cabin_num/100_9
Cabin_num/100_10
Cabin_num/100_11
Cabin_num/100_16
RService_deciles_1
RService_deciles_7
RService_deciles_8
RService_deciles_9
RService_deciles_10
FCourt_deciles_7
FCourt_deciles_8
FCourt_deciles_10
ShMall_deciles_8
ShMall_de

In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_te)
params = model_optimization(100, objective)
model = RidgeClassifier(alpha=params['alpha'])
result_te_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)

[32m[I 2023-04-08 17:47:17,325][0m A new study created in memory with name: no-name-8ad8120d-d2fe-4816-8f96-2e008db8b2dc[0m
[32m[I 2023-04-08 17:47:17,351][0m Trial 0 finished with value: 0.7721063982746226 and parameters: {'alpha': 6.525482387318574}. Best is trial 0 with value: 0.7721063982746226.[0m
[32m[I 2023-04-08 17:47:17,364][0m Trial 1 finished with value: 0.7742631200575126 and parameters: {'alpha': 0.00440047833072303}. Best is trial 1 with value: 0.7742631200575126.[0m
[32m[I 2023-04-08 17:47:17,377][0m Trial 2 finished with value: 0.4996405463695183 and parameters: {'alpha': 77675.88418365645}. Best is trial 1 with value: 0.7742631200575126.[0m
[32m[I 2023-04-08 17:47:17,394][0m Trial 3 finished with value: 0.7742631200575126 and parameters: {'alpha': 0.0003281677039116774}. Best is trial 1 with value: 0.7742631200575126.[0m
[32m[I 2023-04-08 17:47:17,408][0m Trial 4 finished with value: 0.4996405463695183 and parameters: {'alpha': 148681662.22158888}. Bes


Number of finished trials: 100
Best trial:
Value: 0.7749820273184759
  Params: 
    alpha: 0.21896434524500266

Accuracy after tuning: 76.19321449108683%

Classification report
              precision    recall  f1-score   support

       False       0.76      0.76      0.76       861
        True       0.77      0.76      0.76       878

    accuracy                           0.76      1739
   macro avg       0.76      0.76      0.76      1739
weighted avg       0.76      0.76      0.76      1739


Confusion matrix
[[658 203]
 [211 667]]

All features number: 14, Selected features number: 8
Selected_features:

HomePlanet_transformed
Destination_transformed
VIP_transformed
Cabin_side_transformed
Cabin_num/100_transformed
RService_deciles_transformed
Spa_deciles_transformed
VRD_deciles_transformed


In [None]:
X_train, X_val, X_test, y_train, y_val, y_test = get_subsets(df_looe)
params = model_optimization(100, objective)
model = RidgeClassifier(alpha=params['alpha'])
result_looe_tuned = test_tuned_model(model, X_train, y_train, X_test, y_test)

[32m[I 2023-04-08 17:47:20,084][0m A new study created in memory with name: no-name-a41bf1d3-e663-4675-b272-61d3a74c53b5[0m
[32m[I 2023-04-08 17:47:20,110][0m Trial 0 finished with value: 0.7634795111430626 and parameters: {'alpha': 0.0006453963396363557}. Best is trial 0 with value: 0.7634795111430626.[0m
[32m[I 2023-04-08 17:47:20,134][0m Trial 1 finished with value: 0.4996405463695183 and parameters: {'alpha': 1692759.2160461487}. Best is trial 0 with value: 0.7634795111430626.[0m
[32m[I 2023-04-08 17:47:20,156][0m Trial 2 finished with value: 0.5025161754133717 and parameters: {'alpha': 57451.643054182474}. Best is trial 0 with value: 0.7634795111430626.[0m
[32m[I 2023-04-08 17:47:20,179][0m Trial 3 finished with value: 0.7634795111430626 and parameters: {'alpha': 2.343006152766066}. Best is trial 0 with value: 0.7634795111430626.[0m
[32m[I 2023-04-08 17:47:20,210][0m Trial 4 finished with value: 0.7634795111430626 and parameters: {'alpha': 0.01829172493814857}. Be


Number of finished trials: 100
Best trial:
Value: 0.7649173256649893
  Params: 
    alpha: 3.5859437153963456

Accuracy after tuning: 75.56066705002875%

Classification report
              precision    recall  f1-score   support

       False       0.75      0.77      0.76       861
        True       0.77      0.74      0.75       878

    accuracy                           0.76      1739
   macro avg       0.76      0.76      0.76      1739
weighted avg       0.76      0.76      0.76      1739


Confusion matrix
[[662 199]
 [226 652]]

All features number: 14, Selected features number: 7
Selected_features:

HomePlanet_transformed
Destination_transformed
Cabin_side_transformed
Cabin_num/100_transformed
RService_deciles_transformed
Spa_deciles_transformed
VRD_deciles_transformed


# Save results

In [None]:
data = []
data.append(['RidgeClassifier', 'OHE', acc_ohe_ini, result_ohe_tuned[0], str(result_ohe_tuned[1])+" / 90"])
data.append(['RidgeClassifier', 'TE', acc_te_ini, result_te_tuned[0], str(result_te_tuned[1])+" / 14"])
data.append(['RidgeClassifier', 'LOOE', acc_looe_ini, result_looe_tuned[0], str(result_looe_tuned[1])+" / 14"])

In [None]:
import csv
from os.path import exists
resfile = 'spaceship_results.csv'

In [None]:
if exists(resfile):
  f = open(resfile, 'a')
  writer = csv.writer(f)
else:
  header = ['Model', 'Categories_encoding', 'Initial_accuracy', 'Tuned_Accuracy', 'Important_Features']
  f = open(resfile, 'w', newline='')
  writer = csv.writer(f)
  writer.writerow(header)

writer.writerows(data)
f.close()