<a href="https://colab.research.google.com/github/KuzmenkoO/amazinum_home_work/blob/main/lesson_11_supervised_ml_cover.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import  os
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report


cwd =  os.getcwd ()
path = os.path.join (cwd, 'titanic')
train_df = pd.read_csv(os.path.join(path, 'train.csv'))
test_df = pd.read_csv(os.path.join(path, 'test.csv'))

data = pd.concat([train_df, test_df], sort=False)

data['Age'] = data['Age'].fillna(data['Age'].median())
data['Fare'] = data['Fare'].fillna(data['Fare'].median())
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])

data['Sex'] = data['Sex'].map({'male': 1, 'female': 0})
data['Embarked'] = data['Embarked'].map({'C': 0, 'Q': 1, 'S': 2})
data['FamilySize'] = data['SibSp'] + data['Parch'] + 1

features = ['Pclass', 'Sex', 'Age', 'Fare', 'Embarked', 'FamilySize']
X = data.loc[data['Survived'].notna(), features]
y = data.loc[data['Survived'].notna(), 'Survived'].astype(int)
X_test_final = data.loc[data['Survived'].isna(), features]
PassengerId_test = data.loc[data['Survived'].isna(), 'PassengerId']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test_final)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)

models = {
    "LogisticRegression": LogisticRegression(max_iter=1000),
    "RandomForestClassifier": RandomForestClassifier(n_estimators=100, random_state=0),
    "SVC": SVC(probability=True),
    "GradientBoostingClassifier": GradientBoostingClassifier()
}

results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    preds = model.predict(X_test)
    acc = accuracy_score(y_test, preds)
    results[name] = acc

best_model_name = max(results, key=results.get)
best_model = models[best_model_name]

best_model.fit(X_scaled, y)

final_preds = best_model.predict(X_test_scaled)

print("Оцінка класифікаторів:")
for name, acc in results.items():
    print(f"{name}: {acc:.4f}")
print(f"\nНайкраща модель: {best_model_name}")
print("\nЗвіт класифікації:")
print(classification_report(y_test, best_model.predict(X_test)))


Оцінка класифікаторів:
LogisticRegression: 0.8045
RandomForestClassifier: 0.8492
SVC: 0.8045
GradientBoostingClassifier: 0.8547

Найкраща модель: GradientBoostingClassifier

Звіт класифікації:
              precision    recall  f1-score   support

           0       0.88      0.96      0.92       110
           1       0.93      0.78      0.85        69

    accuracy                           0.89       179
   macro avg       0.90      0.87      0.88       179
weighted avg       0.90      0.89      0.89       179

