# A More Complex Heuristic


## Working on Titanic Dataset

In [2]:
import numpy
import pandas
import statsmodels.api as sm

In [3]:
pandas.read_csv('titanic-data.csv')

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [5]:


def complex_heuristic(file_path):
    '''
    You are given a list of Titantic passengers and their associated
    information. More information about the data can be seen at the link below:
    http://www.kaggle.com/c/titanic-gettingStarted/data

    For this exercise, you need to write a more sophisticated algorithm
    that will use the passengers' gender and their socioeconomical class and age 
    to predict if they survived the Titanic diaster. 
    
    You prediction should be 79% accurate or higher.
    
    Here's the algorithm, predict the passenger survived if:
    1) If the passenger is female or
    2) if his/her socioeconomic status is high AND if the passenger is under 18
    
    Otherwise, your algorithm should predict that the passenger perished in the disaster.
    
    Or more specifically in terms of coding:
    female or (high status and under 18)
    
    You can access the gender of a passenger via passenger['Sex'].
    If the passenger is male, passenger['Sex'] will return a string "male".
    If the passenger is female, passenger['Sex'] will return a string "female".
    
    You can access the socioeconomic status of a passenger via passenger['Pclass']:
    High socioeconomic status -- passenger['Pclass'] is 1
    Medium socioeconomic status -- passenger['Pclass'] is 2
    Low socioeconomic status -- passenger['Pclass'] is 3

    You can access the age of a passenger via passenger['Age'].
    
    Write your prediction back into the "predictions" dictionary. The
    key of the dictionary should be the Passenger's id (which can be accessed
    via passenger["PassengerId"]) and the associated value should be 1 if the
    passenger survived or 0 otherwise. 

    For example, if a passenger is predicted to have survived:
    passenger_id = passenger['PassengerId']
    predictions[passenger_id] = 1

    And if a passenger is predicted to have perished in the disaster:
    passenger_id = passenger['PassengerId']
    predictions[passenger_id] = 0
    
    You can also look at the Titantic data that you will be working with
    at the link below:
    https://s3.amazonaws.com/content.udacity-data.com/courses/ud359/titanic_data.csv
    '''

    predictions = {}
    df = pandas.read_csv(file_path)
    for passenger_index, passenger in df.iterrows():
        passenger_id = passenger['PassengerId']
        # 
        # your code here
        # for example, assuming that passengers who are male
        # and older than 18 surived:
        #     if passenger['Sex'] == 'male' or passenger['Age'] < 18:
        #         predictions[passenger_id] = 1
        # 
        if passenger['Sex'] == 'female' or passenger['Pclass'] == 1 and passenger['Age'] < 18 :
            predictions[passenger_id] = 1 
        else :
            predictions[passenger_id] = 0
    return predictions

In [7]:
complex_heuristic('titanic-data.csv')

{1: 0,
 2: 1,
 3: 1,
 4: 1,
 5: 0,
 6: 0,
 7: 0,
 8: 0,
 9: 1,
 10: 1,
 11: 1,
 12: 1,
 13: 0,
 14: 0,
 15: 1,
 16: 1,
 17: 0,
 18: 0,
 19: 1,
 20: 1,
 21: 0,
 22: 0,
 23: 1,
 24: 0,
 25: 1,
 26: 1,
 27: 0,
 28: 0,
 29: 1,
 30: 0,
 31: 0,
 32: 1,
 33: 1,
 34: 0,
 35: 0,
 36: 0,
 37: 0,
 38: 0,
 39: 1,
 40: 1,
 41: 1,
 42: 1,
 43: 0,
 44: 1,
 45: 1,
 46: 0,
 47: 0,
 48: 1,
 49: 0,
 50: 1,
 51: 0,
 52: 0,
 53: 1,
 54: 1,
 55: 0,
 56: 0,
 57: 1,
 58: 0,
 59: 1,
 60: 0,
 61: 0,
 62: 1,
 63: 0,
 64: 0,
 65: 0,
 66: 0,
 67: 1,
 68: 0,
 69: 1,
 70: 0,
 71: 0,
 72: 1,
 73: 0,
 74: 0,
 75: 0,
 76: 0,
 77: 0,
 78: 0,
 79: 0,
 80: 1,
 81: 0,
 82: 0,
 83: 1,
 84: 0,
 85: 1,
 86: 1,
 87: 0,
 88: 0,
 89: 1,
 90: 0,
 91: 0,
 92: 0,
 93: 0,
 94: 0,
 95: 0,
 96: 0,
 97: 0,
 98: 0,
 99: 1,
 100: 0,
 101: 1,
 102: 0,
 103: 0,
 104: 0,
 105: 0,
 106: 0,
 107: 1,
 108: 0,
 109: 0,
 110: 1,
 111: 0,
 112: 1,
 113: 0,
 114: 1,
 115: 1,
 116: 0,
 117: 0,
 118: 0,
 119: 0,
 120: 1,
 121: 0,
 122: 0,
 123: 0,
 

## Your heuristic was 79.12% accurate. Good job!