In [33]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC  # Support Vector Classifier
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV

In [34]:
# Reading the dataset
titanic = pd.read_csv('titanic.csv')

In [35]:
#категоризирую пол
titanic['Sex'] = titanic['Sex'].map({'male':0, 'female':1})

# #Создаю новый параметр Female size
# titanic['Female size'] = titanic[['Siblings/Spouses Aboard', 'Parents/Children Aboard']].sum(axis=1)

#определяю X как титаник без выживших и имени, потому что они не нужны, и y как выживших    
X, y =titanic.drop(['Survived', 'Name'], axis=1), titanic['Survived']

X.shape,y.shape

((887, 7), (887,))

In [36]:
# Categorize the target into binary classes
y_class = [target for target in y]

In [37]:

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_class, test_size=0.2, random_state=42)


In [38]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [39]:

# Initialize the SVM Classifier
svm_classifier = SVC()

# Train the classifier
svm_classifier.fit(X_train, y_train)


In [40]:

# Make predictions
y_pred = svm_classifier.predict(X_test)

In [41]:
scores = cross_val_score(svm_classifier, X_train, y_train, cv=10, scoring='f1_macro')
scores

array([0.81478261, 0.76629361, 0.81794872, 0.9073913 , 0.8317173 ,
       0.86231416, 0.76094276, 0.78760684, 0.8405146 , 0.83300802])

In [42]:
scores.mean()

0.8222519927564745

In [49]:
# Define the hyperparameters and their possible values
param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': [0.01, 0.1, 1, 'scale'],
}


In [50]:

# Create a GridSearchCV object
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='accuracy') # f1_macro, f1 # , verbose=1, n_jobs=-1

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)


Best Hyperparameters: {'C': 10, 'gamma': 0.1, 'kernel': 'rbf'}


In [51]:

# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model on the test set
accuracy = best_model.score(X_test, y_test)
print("Test Accuracy:", accuracy)


Test Accuracy: 0.7865168539325843


In [52]:
scores = cross_val_score(best_model, X_train, y_train, cv=10, scoring='accuracy')
print(scores)
print(scores.mean())

[0.8028169  0.8028169  0.85915493 0.92957746 0.85915493 0.87323944
 0.77464789 0.83098592 0.84507042 0.81428571]
0.8391750503018109


In [48]:

# Create a GridSearchCV object
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='f1_macro') # f1_macro, f1 # , verbose=1, n_jobs=-1

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)


KeyboardInterrupt: 

In [53]:

# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model on the test set
accuracy = best_model.score(X_test, y_test)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7865168539325843


In [54]:
scores = cross_val_score(best_model, X_train, y_train, cv=10, scoring='f1_macro')
print(scores)
print(scores.mean())

[0.78391304 0.77969858 0.8482906  0.92209787 0.8482906  0.86231416
 0.76094276 0.81478261 0.8405146  0.80534759]
0.8266192413455459


In [55]:

# Create a GridSearchCV object
grid_search = GridSearchCV(svm_classifier, param_grid, cv=5, scoring='f1') # f1_macro, f1 # , verbose=1, n_jobs=-1

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Print the best hyperparameters
print("Best Hyperparameters:", grid_search.best_params_)


Best Hyperparameters: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}


In [56]:

# Get the best model
best_model = grid_search.best_estimator_

# Evaluate the best model on the test set
accuracy = best_model.score(X_test, y_test)
print("Test Accuracy:", accuracy)

Test Accuracy: 0.7865168539325843


In [57]:
scores = cross_val_score(best_model, X_train, y_train, cv=10, scoring='f1')
print(scores)
print(scores.mean())

[0.76       0.69387755 0.76923077 0.88       0.78431373 0.82352941
 0.7037037  0.73076923 0.81355932 0.79245283]
0.7751436544201591


In [58]:

# Evaluate the model
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

conf_matrix

array([[99, 12],
       [26, 41]], dtype=int64)

In [59]:
print(class_report)

              precision    recall  f1-score   support

           0       0.79      0.89      0.84       111
           1       0.77      0.61      0.68        67

    accuracy                           0.79       178
   macro avg       0.78      0.75      0.76       178
weighted avg       0.79      0.79      0.78       178

