In [5]:
import ipywidgets as widgets
from IPython.display import display
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Laden der Titanic-Daten
df_titanic_train = pd.read_csv('titanic_training.csv')
df_titanic_test = pd.read_csv('titanic_test.csv')
df_titanic_validate = pd.read_csv('titanic_validate.csv')

# Datenaufbereitung und Modellerstellung wie im angegebenen Code
X = df_titanic_train.drop(['Survived', 'Cabin', 'Ticket', 'Name'], axis=1)
y = df_titanic_train['Survived']

X['Sex'] = X['Sex'].map({'male': 0, 'female': 1})
X['Embarked'] = X['Embarked'].map({'Q': 0, 'S': 1, 'C': 2})

X.dropna(inplace=True)
y = y[X.index]

# Train-Test-Split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Beste Features und Modelltraining
best_features = ['Pclass', 'Sex', 'Parch', 'Embarked']
n_neighbors = 5
knn = KNeighborsClassifier(n_neighbors=n_neighbors)
knn.fit(X_train[best_features], y_train)

# Interaktive Vorhersage basierend auf PassengerId und Überprüfung der Vorhersage
def predict_survival_by_id_test(passenger_id):
    # Suche die Passagierdaten im Testdatensatz
    passenger_data = df_titanic_test[df_titanic_test['PassengerId'] == passenger_id]
    
    if passenger_data.empty:
        return f"PassengerId {passenger_id} not found.", None, None
    
    # Nur die besten Features für die Vorhersage verwenden
    passenger_data_for_prediction = passenger_data[best_features]

    # Map categorical variables as in the training data
    passenger_data_for_prediction['Sex'] = passenger_data_for_prediction['Sex'].map({'male': 0, 'female': 1})
    passenger_data_for_prediction['Embarked'] = passenger_data_for_prediction['Embarked'].map({'Q': 0, 'S': 1, 'C': 2})

    # Fill any missing values using the median
    passenger_data_for_prediction.fillna(X_train[best_features].median(), inplace=True)

    # Predict using the trained KNN model
    prediction = knn.predict(passenger_data_for_prediction)
    
    # Prepare the result message
    survival_result = 'Survived' if prediction[0] == 1 else 'Not Survived'
    
    # Extracting important features
    passenger_details = {
        "Name": passenger_data['Name'].values[0],
        "Age": passenger_data['Age'].values[0],
        "Gender": 'Female' if passenger_data['Sex'].values[0] == 'female' else 'Male',
        "Class": passenger_data['Pclass'].values[0],
        "Fare": passenger_data['Fare'].values[0],
        "Embarked": passenger_data['Embarked'].values[0],
        "Siblings/Spouses Aboard": passenger_data['SibSp'].values[0],
        "Parents/Children Aboard": passenger_data['Parch'].values[0]
    }
    
    # Überprüfe die Vorhersage mit den tatsächlichen Überlebensdaten aus dem Validate-Datensatz
    actual_survival_data = df_titanic_validate[df_titanic_validate['PassengerId'] == passenger_id]
    
    if actual_survival_data.empty:
        correctness = "N/A - Actual survival data not found"
    else:
        actual_survived = actual_survival_data['Survived'].values[0]
        correctness = 'Correct' if prediction[0] == actual_survived else 'Incorrect'
    
    return survival_result, passenger_details, correctness

# Widget für PassengerId-Eingabe
passenger_id_input_test = widgets.IntText(
    value=892,
    description='PassengerId:',
    disabled=False
)

# Ergebnisanzeige-Widgets
result_label_test = widgets.Label(value="Prediction:")
details_html_test = widgets.HTML(value="Details:")
correctness_label_test = widgets.Label(value="Correctness:")

# Event-Handler für den Button-Klick
def on_button_click_test(b):
    passenger_id = passenger_id_input_test.value
    result, details, correctness = predict_survival_by_id_test(passenger_id)
    result_label_test.value = f"Prediction: {result}"
    correctness_label_test.value = f"Correctness: {correctness}"
    
    if details:
        details_text = f"""
        <b>Name:</b> {details['Name']}<br>
        <b>Age:</b> {details['Age']}<br>
        <b>Gender:</b> {details['Gender']}<br>
        <b>Class:</b> {details['Class']}<br>
        <b>Fare:</b> {details['Fare']}<br>
        <b>Embarked:</b> {details['Embarked']}<br>
        <b>Siblings/Spouses Aboard:</b> {details['Siblings/Spouses Aboard']}<br>
        <b>Parents/Children Aboard:</b> {details['Parents/Children Aboard']}
        """
        details_html_test.value = details_text
    else:
        details_html_test.value = "Details: Passenger not found."

# Button erstellen
submit_button_test = widgets.Button(
    description='Submit',
    button_style='success',  
    tooltip='Click to make prediction'
)

# Button-Klick mit der Event-Funktion verknüpfen
submit_button_test.on_click(on_button_click_test)

# Anzeige der Widgets
display(widgets.VBox([widgets.HBox([passenger_id_input_test, submit_button_test]), result_label_test, correctness_label_test, details_html_test]))


VBox(children=(HBox(children=(IntText(value=892, description='PassengerId:'), Button(button_style='success', d…

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passenger_data_for_prediction['Sex'] = passenger_data_for_prediction['Sex'].map({'male': 0, 'female': 1})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passenger_data_for_prediction['Embarked'] = passenger_data_for_prediction['Embarked'].map({'Q': 0, 'S': 1, 'C': 2})
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  passenger_data_for_prediction.fillna(X_t