In [22]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [23]:
df = sns.load_dataset("titanic").drop(columns=["alive"])

In [24]:
num_features = ["age", "fare"]
cat_features = ["sex", "class", "embark_town"]

for col in num_features:
    df[col].interpolate(method="linear", inplace=True)

for col in cat_features:
    df[col].fillna(df[col].mode()[0], inplace=True)

bins = [0, 12, 20, 40, 60, np.inf]
labels = ["child", "teen", "young", "middle", "senior"]
df["age_group"] = pd.cut(df["age"], bins=bins, labels=labels)

df = pd.get_dummies(df, columns=cat_features + ["age_group"], drop_first=True)
df.drop(columns=["deck", "embarked", "who"], inplace=True)

In [25]:
X = df.drop(columns=["survived"])
y = df["survived"].fillna(0)

model = RandomForestClassifier(n_estimators=128, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [26]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.8045


In [27]:
import os
import shutil
import datetime


def save_notebook(title, good=False):
    now = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
    experiments_dir = f"experiments_{title}"
    good_dir = f"{experiments_dir}/good"
    
    if not os.path.exists(experiments_dir):
        os.makedirs(experiments_dir)

    if good:
        if not os.path.exists(good_dir):
            os.makedirs(good_dir)
        new_filename = f"{good_dir}/{now}.ipynb"
    else:
        new_filename = f"{experiments_dir}/{now}.ipynb"

    shutil.copy(title, new_filename)
    print(f"Файл збережено як {new_filename}")

In [None]:
save_notebook('pipeline.ipynb', 0)

Файл збережено як experiments_pipeline.ipynb/good/2025-03-24_18-49-27.ipynb
