<a href="https://colab.research.google.com/github/MStamirski/Spaceship-Titanic/blob/main/Optimization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.feature_selection import SelectFromModel

In [None]:
!pip install optuna

In [None]:
import optuna

In [None]:
def verify_feature_dataset(dataset, model):
  df = dataset.copy()
  X = df.drop(columns=['PassengerId', 'Transported'])
  y = df['Transported']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42)
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  print(f"Accuracy: {accuracy*100}%")
  return accuracy

In [None]:
def get_subsets(dataset):
  X = dataset.drop(columns=['PassengerId', 'Transported'])
  y = dataset['Transported']
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)
  X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state=43)
  return X_train, X_val, X_test, y_train, y_val, y_test

In [None]:
def best_parameters(study):
  print("\n==============================================")
  print(f"Number of finished trials: {len(study.trials)}")
  print('Best trial:')
  trial = study.best_trial
  print(f"Value: {trial.value}")
  print('  Params: ')
  for key, value in trial.params.items():
    print(f"    {key}: {value}")
  print("==============================================")
  return trial.params

In [None]:
def model_optimization(trials, objective, samp=None):
  if samp!=None:
    study = optuna.create_study(direction='maximize', sampler=samp)
  else:
    study = optuna.create_study(direction='maximize')
  
  study.optimize(objective, n_trials=trials)
  params = best_parameters(study)
  return params

In [None]:
def test_tuned_model(model, X_train, y_train, X_test, y_test):
  model.fit(X_train, y_train)
  y_pred = model.predict(X_test)
  accuracy = accuracy_score(y_test, y_pred)
  print(f"\nAccuracy after tuning: {accuracy*100}%")
  print("\nClassification report")
  print(classification_report(y_test, y_pred))
  print("\nConfusion matrix")
  print(confusion_matrix(y_test, y_pred))
  print("==============================================")

  selector = SelectFromModel(model)
  selector.fit(X_train, y_train)
  status = selector.get_support()
  all_features = X_train.columns
  selected_features = X_train.columns[status].tolist()
  print(f"\nAll features number: {len(all_features)}, Selected features number: {len(selected_features)}")
  print("Selected_features:\n")
  for feature in selected_features:
    print(feature)

  return accuracy, len(selected_features)