<a href="https://colab.research.google.com/github/Fortland2018/ML-Projects/blob/main/SpaceTitanic.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# IMPORTANT: SOME KAGGLE DATA SOURCES ARE PRIVATE
# RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES.
import kagglehub
kagglehub.login()


In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

spaceship_titanic_path = kagglehub.competition_download('spaceship-titanic')

print('Data source import complete.')


In [None]:
# %%
import pandas as pd
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt
from tensorflow.keras.callbacks import EarlyStopping

# Wczytaj dane
train = pd.read_csv('/kaggle/input/spaceship-titanic/train.csv')
test = pd.read_csv('/kaggle/input/spaceship-titanic/test.csv')



# %%
#CRYOSLEEP


def map_cryo(val):
    if pd.isna(val):
        return 0.5
    if val in [True, 'True', 1, '1']:
        return 1
    if val in [False, 'False', 0, '0']:
        return 0
    return 0.5  # na wszelki wypadek

train['CryoSleep'] = train['CryoSleep'].apply(map_cryo).astype('float64')
test['CryoSleep'] = test['CryoSleep'].apply(map_cryo).astype('float64')

# %%

# Połącz zbiory
train['is_train'] = 1
test['is_train'] = 0
full = pd.concat([train, test], ignore_index=True)

# Kodowanie kategorii na pełnym zbiorze
for col in ['HomePlanet', 'Cabin', 'Destination', 'VIP']:
    full[col] = full[col].astype('category').cat.codes

# Rozdziel z powrotem
train = full[full['is_train'] == 1].drop(columns=['is_train'])
test = full[full['is_train'] == 0].drop(columns=['is_train'])



# %%

# Uzupełnij brakujące wartości
# Impute missing values
skip_cols = ['CryoSleep', 'PassengerId']
for col in train.columns:
    if col in skip_cols:
        continue
    if train[col].dtype in ['float64', 'int64']:
        train[col] = train[col].fillna(train[col].mean())
    else:
        train[col] = train[col].fillna(train[col].mode()[0])




print(train.loc[train['PassengerId'] == '0064_02', 'HomePlanet'])
print(train['CryoSleep'])

# %%

for col in test.columns:
    if test[col].dtype in ['float64', 'int64']:
        test[col] = test[col].fillna(test[col].mean())
    else:
        mode_val = test[col].mode()
        if not mode_val.empty:
            test[col] = test[col].fillna(mode_val[0])



# %%
from sklearn.preprocessing import MinMaxScaler

# Split PassengerId into two features
def split_passenger_id(df):
    ids = df['PassengerId'].str.split('_', expand=True)
    df['Group'] = ids[0].astype(int)
    df['Position'] = ids[1].astype(int)
    return df
print(train['PassengerId'])
# Apply to train and test
train = split_passenger_id(train)
test = split_passenger_id(test)



print(train)

# %%
# Grupowanie po 'Group' i liczenie sumy Transported oraz liczby osób w grupie
group_stats = train.groupby('Group')['Transported'].agg(['sum', 'count'])

# Dodaj kolumnę: ile osób przeżyło w grupie
group_stats['survived'] = group_stats['sum']
group_stats['total'] = group_stats['count']


# %%
# Utwórz słownik: Group -> status (0: wszyscy zginęli, 0.5: mieszana, 1: wszyscy przeżyli)
group_status = {}
for group, row in group_stats.iterrows():
    if row['survived'] == 0:
        group_status[group] = 0
    elif row['survived'] == row['total']:
        group_status[group] = 1
    else:
        group_status[group] = 0.5
print(group_status)
print(group_stats)
# Dodaj kolumnę do train
train['group_survival_status'] = train['Group'].map(group_status)

# Dla testu: mogą być grupy, których nie było w train, więc domyślnie 0.5 (nieznany status)
test['group_survival_status'] = test['Group'].map(group_status).fillna(0.5)

# %%
print(train)

# %%
print(train[['HomePlanet', 'group_survival_status', 'CryoSleep', 'Cabin', 'Destination', 'Age', 'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']])

# %%
scaler = MinMaxScaler()

cols_to_normalize = ['Age', 'Cabin', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck']

# Normalize in train
train[cols_to_normalize] = scaler.fit_transform(train[cols_to_normalize])

# Normalize in test using train scaler
test[cols_to_normalize] = scaler.transform(test[cols_to_normalize])

# %%
# Wybierz cechy i etykietę
features = ['HomePlanet', 'group_survival_status', 'CryoSleep', 'Cabin', 'Destination', 'Age', 'VIP', 'RoomService', 'FoodCourt', 'ShoppingMall', 'Spa', 'VRDeck',]
X_train = train[features]
y_train = train['Transported'].astype(int)
X_test = test[features]

# Model
model = keras.Sequential([
    layers.Input(shape=(len(features),)),
    layers.Dense(32, activation='relu',),
    layers.Dense(16, activation='relu'),
    layers.Dense(1, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

# Trening
history = model.fit(X_train, y_train, epochs=12, batch_size=64, validation_split=0.2, verbose=1, callbacks=[early_stop])



# %%

# Wykres przebiegu uczenia
plt.figure(figsize=(10,4))
plt.subplot(1,2,1)
plt.plot(history.history['loss'], label='Loss')
plt.xlabel('Epoka')
plt.ylabel('Strata')
plt.title('Przebieg straty (loss)')
plt.legend()
plt.subplot(1,2,2)
plt.plot(history.history['accuracy'], label='Accuracy', color='green')
plt.xlabel('Epoka')
plt.ylabel('Dokładność')
plt.title('acc')
plt.legend()
plt.tight_layout()
plt.show()

# Histogram wieku pasażerów
plt.figure(figsize=(6,4))
plt.hist(train['Age'], bins=30, color='skyblue', edgecolor='black')
plt.xlabel('Wiek')
plt.ylabel('Liczba pasażerów')
plt.title('Rozkład wieku pasażerów')
plt.show()

# Wykres słupkowy liczby pasażerów z podziałem na planety
plt.figure(figsize=(6,4))
train['HomePlanet'].value_counts().plot(kind='bar', color='orange')
plt.xlabel('Planeta')
plt.ylabel('Liczba pasażerów')
plt.title('Liczba pasażerów z podziałem na planety')
plt.show()

# Predykcja
preds = model.predict(X_test)
preds = (preds > 0.5).astype(bool)

# Przygotowanie submission
submission = pd.DataFrame({
    'PassengerId': test['PassengerId'],
    'Transported': preds.flatten()
})
submission.to_csv('submission.csv', index=False)


