<h1 style="text-align: center">Kaggle Titanic Dataset</h1>
<h3 style="text-align: center">Predicting passenger survival</h3>


In [11]:
import numpy as np
import matplotlib as plt
import csv as csv

ALPHA = 0.1
ESPILON = 0.001
TRAIN_DATA_FILE = "train.csv"
TEST_DATA_FILE = "test.csv"
DATA_OUTPUT_NAME = "answers.csv"

UNKNOWN_AGE_INSERT = 30 # This is the assumption for the passenger if the age is unknown.

## Fetching and Organizing Data

#### Raw Passenger Data
Passenger Data is returned as a list of strings, in the order of the following:
*Passenger Id, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked*

#### Parsed Passenger Data
Passenger Data needs to be parsed and converted into numbers for the machine learning algorithms to take over. The following will be the parsing scheme. This will also be the order as well

1. **Passenger Id**: This will stay the same, nor will be converted into an int.
2. **Survived**: 0 for died, 1 for survived
3. **Pclass**: 1 for upper class, 2 for middle class, 3 for lower class
4. **Sex**: 0 for Male, 1 for Female
5. **Age**: Float from the string, if it is an empty string then going to default to age of 30 (arbitrary).
6. **SibSp**: Sibling/Spouse on board, staying the same.
7. **Parch**: # of Parent/Children on board, staying the same.
8. **Fare**: Convert to a float

In [12]:
def FetchPassengerData(fileName: str) -> list:
    """Gets the information and then returns a list of the passengers in the format shown below (as strings)

    Args:
        fileName (str): The csv file to open

    Returns:
        list: PassengerId, Survived, Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked 
    """
    result = []
    with open(fileName, newline='') as csvfile:
        reader = csv.reader(csvfile, delimiter=',')
        for row in reader:
            result.append(row)
    return result

def ParseSurvived(survived: str) -> int:
    try:
        return int(float(survived))
    except Exception as ex:
        print(str(ex))
        return 0


def ParseAge(age: str) -> int:
    try:
        return float(age)
    except ValueError:
        return UNKNOWN_AGE_INSERT 

def ParsePassengerData(passenger: list) -> list:
    """Takes passenger data and converts all the types to be usable for the machine learning. See the paragraph above for the information.
     

    Args:
        passenger (list): PassengerId, Survived (optional), Pclass, Name, Sex, Age, SibSp, Parch, Ticket, Fare, Cabin, Embarked

    Returns:
        list: PassengerId, Survived, Pclass, Sex, Age, SibSp, Parch, Fare
    """

    # Passenger surviving wasn't added. Add a '0' for survivied (mirroring test data)
    if len(passenger) < 12:
        passenger.insert(1, '0')
    
    if len(passenger) < 12:
        print("List is less than 12 elements still.")
        print(f"Passenger: {passenger}")
        raise Exception(ValueError)
    
    PassengerId = passenger[0]
    Survived = ParseSurvived(passenger[1])
    Pclass = int(passenger[2])
    #  Name is irrelevant (passenger[3])
    Sex = int(passenger[4] == 'female')
    Age = ParseAge(passenger[5])
    SibSp = int(passenger[6])
    Parch = int(passenger[7])
    # Ticket is irrelevant (passenger[8])
    Fare = float(passenger[9])
    # Cabin will be relevant -- going to eventually make up a scheme and numbering system based general cabin location.
    # I doubt embarked will matter, might be worth adding later.

    return [PassengerId, Survived, Pclass, Sex, Age, SibSp, Parch, Fare]


### Global Variables for Results (NOT CONST)

In [13]:
W_HAT = [] # Vector to be multiplied acrossed.
B = 0 # Correction number
HIST_J = [] # History of the derivative (used to check alpha)
COST = 0 # Cost of using current W_HAT and B
ITERATION = 0 # Current Iteration Count

### Functions for Calculating One Iteration

In [14]:
def Functions():
    return 0

### Training Function (and running)

In [15]:
def TrainData():
    TrainRawData = FetchPassengerData(TRAIN_DATA_FILE)
    TrainData = []

    for passenger in TrainRawData[1:]:
        TrainData.append(ParsePassengerData(passenger))
    previous_dj = 1

    while ITERATION < 100000 and previous_dj > ESPILON:
        ITERATION += 1
        