In [1]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.model_selection import GridSearchCV
from sklearn import metrics
try:
    from sklearn.model_selection import StratifiedKFold
except ImportError:
    from sklearn.cross_validation import StratifiedKFold

In [2]:
df = pd.read_csv('Titanic.csv')
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
def load_data(data_path):
    df = pd.read_csv(data_path) 
    df = df.drop(['Name','Ticket','Cabin'],axis=1)
    df.loc[:,['Sex']] = df.loc[:,['Sex']].replace({'male':0,'female':1})
    df.loc[:,['Embarked']] = df.loc[:,['Embarked']].replace({'S':0,'C':1,'Q':2})
    df = df.dropna(axis=0)
    y = df['Survived']
    X = df.drop('Survived',axis = 1)
    sc = StandardScaler() #выполняем нормализацию данных, после чего наши данные преобразуются 
                           #из структуры датафрейм в обычный массив
    sc.fit(X)
    X_ans = sc.transform(X)
    return X_ans, y

In [4]:
X, y = load_data('Titanic.csv')

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)

In [6]:
best_model = svm.SVC(kernel='linear', C=1, gamma=1)

In [7]:
best_model.fit(X_train , y_train)

SVC(C=1, gamma=1, kernel='linear')

In [13]:
model_params = best_model.get_params()
tuned_params = {}
for k, v in model_params.items():
    tuned_params[k] = [v]
tuned_params['gamma'] = range(1, 50)
tuned_params['C'] = range(1,10)
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=54)
clf = GridSearchCV(best_model, tuned_params, cv=cv, n_jobs=-1)
clf.fit(X_train, y_train)
best_params = clf.best_params_

In [14]:
best_model = svm.SVC(**best_params)
best_model.fit(X_train, y_train)
predicted = best_model.predict(X_test)
print('Used params:', best_params)
print('Evaluation:\n', metrics.(y_test, predicted))

Used params: {'C': 1, 'break_ties': False, 'cache_size': 200, 'class_weight': None, 'coef0': 0.0, 'decision_function_shape': 'ovr', 'degree': 3, 'gamma': 1, 'kernel': 'linear', 'max_iter': -1, 'probability': False, 'random_state': None, 'shrinking': True, 'tol': 0.001, 'verbose': False}
Evaluation:
 0.7466067415730337
