# 

### ⚙️ Importing Libraries & Environment Setup

In [None]:
import os
from typing import Any, Callable

import pandas as pd
from colorama import Fore, Style
from sklearn.model_selection import (
    train_test_split,
)

In [None]:
%matplotlib inline

pd.set_option("display.width", 175)
pd.set_option("display.max_rows", 100)
pd.set_option("display.max_columns", None)

In [None]:
should_save_submission = True

# Determine dataset directory based on environment
if os.path.exists("/kaggle/input/titanic"):
    dataset_path = "/kaggle/input/titanic"  # Kaggle
    should_save_submission = True
elif os.path.exists("./dataset"):
    dataset_path = "./dataset"  # Local
else:
    raise FileNotFoundError("Dataset directory not found.")

## 🛠️ Utility Functions

In [None]:
class PandasProxy:
    """A proxy class that provides controlled access to a pandas DataFrame."""

    def __init__(
        self, get_df: Callable[[], pd.DataFrame], set_df: Callable[[pd.DataFrame], None]
    ) -> None:
        """Initialize the proxy with getter and setter functions.

        Args:
            get_df (Callable[[], pd.DataFrame]): Function to retrieve the DataFrame.
            set_df (Callable[[pd.DataFrame], None]): Function to set/update the
                DataFrame.
        """
        self._get_df = get_df
        self._set_df = set_df

    def __getitem__(self, key: Any) -> Any:
        """Retrieve an item from the proxied DataFrame."""
        return self._get_df()[key]

    def __setitem__(self, key: Any, value: Any) -> None:
        """Set a value in the proxied DataFrame and update it."""
        df = self._get_df()
        df[key] = value
        self._set_df(df)

    def __getattr__(self, name: str) -> Any:
        """Forward attribute access to the proxied DataFrame."""
        return getattr(self._get_df(), name)


class DatasetManager:
    """Manages training and testing datasets.

    This class maintains synchronization between the separate train/test DataFrames and
    their concatenated form. Updating the combined DataFrame automatically updates
    the individual train and test DataFrames.
    """

    def __init__(self, train_df: pd.DataFrame, test_df: pd.DataFrame) -> None:
        """Initialize DatasetManager with train and test DataFrames.

        Args:
            train_df (pd.DataFrame): Training data.
            test_df (pd.DataFrame): Testing data.
        """
        self._train_df = train_df.reset_index(drop=True)
        self._test_df = test_df.reset_index(drop=True)

        self._train_size = len(self._train_df)
        self._columns_to_drop_from_test = list(
            set(self._train_df.columns) - set(self._test_df.columns)
        )
        self._combined_df = pd.concat([self._train_df, self._test_df]).reset_index(
            drop=True
        )

        self._combined_df_proxy = PandasProxy(
            self._get_combined_df, self._set_combined_df
        )

        self._assign_names()

    def _assign_names(self) -> None:
        """Assign descriptive names to train, test, and combined DataFrames."""
        self._train_df.name = "Training Set"
        self._test_df.name = "Test Set"
        self._combined_df.name = "Combined Set"

    def _get_combined_df(self) -> pd.DataFrame:
        """Get the combined DataFrame of train and test datasets.

        Returns:
            pd.DataFrame: Concatenated DataFrame of train and test sets.
        """
        return self._combined_df

    def _set_combined_df(self, new_combined_df: pd.DataFrame) -> None:
        """Set the combined DataFrame and update the train and test DataFrames.

        Args:
            new_combined_df (pd.DataFrame): New combined DataFrame.
        """
        self._combined_df = new_combined_df
        self._train_df = self._combined_df.iloc[: self._train_size].reset_index(
            drop=True
        )
        self._test_df = (
            self._combined_df.iloc[self._train_size :]
            .drop(self._columns_to_drop_from_test, axis=1)
            .reset_index(drop=True)
        )
        self._assign_names()

    @property
    def combined_df(self) -> PandasProxy:
        """Get a proxy to the combined DataFrame of train and test datasets.

        Returns:
            PandasProxy: Proxy object allowing controlled access to the DataFrame.
        """
        return self._combined_df_proxy

    @combined_df.setter
    def combined_df(self, new_combined_df: pd.DataFrame) -> None:
        """Set the combined DataFrame and update the train and test DataFrames.

        Args:
            new_combined_df (pd.DataFrame): New combined DataFrame.
        """
        self._set_combined_df(new_combined_df)

    @property
    def train_df(self) -> pd.DataFrame:
        """Get the training DataFrame.

        Returns:
            pd.DataFrame: Training dataset.
        """
        return self._train_df

    @property
    def test_df(self) -> pd.DataFrame:
        """Get the testing DataFrame.

        Returns:
            pd.DataFrame: Testing dataset.
        """
        return self._test_df

    @property
    def train_test_dfs(self) -> tuple[pd.DataFrame, pd.DataFrame]:
        """Get the separate train and test DataFrames.

        Returns:
            tuple[pd.DataFrame, pd.DataFrame]: Tuple containing (train_df, test_df).
        """
        return self._train_df, self._test_df

## 📥 Load and Inspect Dataset

In [None]:
train_df = pd.read_csv(f"{dataset_path}/train.csv")
test_df = pd.read_csv(f"{dataset_path}/test.csv")
dm = DatasetManager(train_df, test_df)

In [None]:
print(f"{Fore.MAGENTA}DataFrame Info:{Style.RESET_ALL}")
for df in dm.train_test_dfs:
    print(f"\n{Fore.CYAN}====== {df.name} ======{Style.RESET_ALL}")
    df.info()

In [None]:
print(f"{Fore.GREEN}DataFrame Description:{Style.RESET_ALL}")
for df in dm.train_test_dfs:
    print(f"\n{Fore.CYAN}====== {df.name} ======{Style.RESET_ALL}")
    print(df.describe(include="all"))

In [None]:
print(f"{Fore.YELLOW}First Rows of DataFrame:{Style.RESET_ALL}")
for df in dm.train_test_dfs:
    print(f"\n{Fore.CYAN}====== {df.name} ======{Style.RESET_ALL}")
    print(df.head())

In [None]:
print(f"{Fore.RED}Missing Values in Each Column:{Style.RESET_ALL}")
for df in dm.train_test_dfs:
    print(f"\n{Fore.CYAN}====== {df.name} ======{Style.RESET_ALL}")
    print(df.isnull().sum())

## 🧹 Feature Engineering

---

In [None]:
print(f"{Fore.RED}Missing Values in Each Column:{Style.RESET_ALL}")
for df in dm.train_test_dfs:
    print(f"\n{Fore.CYAN}====== {df.name} ======{Style.RESET_ALL}")
    print(df.isnull().sum())

## 📊 Exploratory Data Visualizations

## 🧪 Model Training

In [None]:
features = []

X, y = dm.train_df[features], dm.train_df[""]

X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=42
)

### 📊 Model Evaluation

### 🧠 Final Model Training

In [None]:
final_model = None
final_model.fit(X, y)

submission_predictions = final_model.predict(dm.test_df[features])

if should_save_submission:
    submission_df = pd.DataFrame(
        {"PassengerId": dm.test_df["PassengerId"], "Survived": submission_predictions}
    )
    submission_df.to_csv("submission.csv", index=False)
    print(f"{Fore.GREEN}Submission was successfully saved!{Style.RESET_ALL}")
else:
    print(f"{Fore.YELLOW}Submission not saved.{Style.RESET_ALL}")