# Reading data
Read the train.csv file as a pandas dataframe.

In [31]:
import pandas as pd

datos = pd.read_csv("Data/train.csv")
datos.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


# Indexing
1. Create a function that returns the name of a passenger given their PassengerId.
2. Create a function that returns the PassengerId of a passenger given their Name.
3. Print a message with the ID of passenger **Montvila, Rev. Juozas** with the following format: 'The ID pf passenger Montvila, Rev. Juozas is ##'
4. Print a message with the name of the passenger with ID **42** with the following format: 'The passenger with ID 42 is X'

In [None]:
def get_name(int: id) -> str:
    """
    Recibe un id y devuelve el nombre asociado
    Args:
        id (int): id del pasagero

    Returns:
        str: nombre del pasajero
    """

    nombre = datos.loc[datos["PassengerId"] == id, "Name"].values    
    return nombre



def get_id(nombre: str)-> int:
    """
    Recibe un nombre y devuelve el id asociado
    Args:
        nombre (str): nombre del pasajero

    Returns:
        int: id asociado al nombre del pasajero
    """
    id = datos.loc[datos["Name"] == nombre,"PassengerId"].values
    return id

id = get_id("Montvila, Rev. Juozas")
print("The ID of passenger Montvila, Rev.Juozas is", id)
name = get_name(42)
print("The passenger with ID 42 is ",name)

The ID of passenger Montvila, Rev.Juozas is [887]
The passenger with ID 42 is  ['Turpin, Mrs. William John Robert (Dorothy Ann Wonnacott)']


5. Print all information about the oldest passenger.

In [47]:
o_p = datos[datos["Age"] == datos["Age"].max()]
print(o_p)

     PassengerId  Survived  Pclass                                  Name  \
630          631         1       1  Barkworth, Mr. Algernon Henry Wilson   

      Sex   Age  SibSp  Parch Ticket  Fare Cabin Embarked  
630  male  80.0      0      0  27042  30.0   A23        S  


# Subseting
We are asked to share data for analysis by a third party. Since our dataset contains personal details, we only want to share with them the following information: ticket classes, fares and port of embarkation. We are asked to deliver a sample of the first 100 rows of this dataset.

6. Create and save the new dataset in **data/port_fares.csv**.

In [52]:
new = datos.loc[:100,["Pclass","Fare","Embarked"]]
print(new)
new.to_csv("Data/port_fares.csv",index=False)

     Pclass     Fare Embarked
0         3   7.2500        S
1         1  71.2833        C
2         3   7.9250        S
3         1  53.1000        S
4         3   8.0500        S
..      ...      ...      ...
96        1  34.6542        C
97        1  63.3583        C
98        2  23.0000        S
99        2  26.0000        S
100       3   7.8958        S

[101 rows x 3 columns]


# Counting
7. We want to know if there were any survivors over the age of 60, print all of their information.
8. How many people over 60 survived?
9. What percentage of people over 60 survived?

In [63]:
p_60 = datos[datos["Age"] > 60]
p_60_S = datos[(datos["Age"] > 60) & (datos["Survived"] == 1)]
print(p_60_S)
survivors = p_60_S.shape[0]
print("Survivors over 60", survivors)
percentage = survivors/p_60.shape[0] * 100
print("percentage survived",percentage) 

     PassengerId  Survived  Pclass                                       Name  \
275          276         1       1          Andrews, Miss. Kornelia Theodosia   
483          484         1       3                     Turkula, Mrs. (Hedwig)   
570          571         1       2                         Harris, Mr. George   
630          631         1       1       Barkworth, Mr. Algernon Henry Wilson   
829          830         1       1  Stone, Mrs. George Nelson (Martha Evelyn)   

        Sex   Age  SibSp  Parch       Ticket     Fare Cabin Embarked  
275  female  63.0      1      0        13502  77.9583    D7        S  
483  female  63.0      0      0         4134   9.5875   NaN        S  
570    male  62.0      0      0  S.W./PP 752  10.5000   NaN        S  
630    male  80.0      0      0        27042  30.0000   A23        S  
829  female  62.0      0      0       113572  80.0000   B28      NaN  
Survivors over 60 5
percentage survived 22.727272727272727


# Women and children first?
10. Find out if women and children were more likely to survive.

In [68]:
women_s = datos[(datos["Sex"] == "female") & (datos["Survived"] == 1) & (datos["Age"] >=18)]
Children_s = datos[(datos["Age"] < 18) & (datos["Survived"] == 1)]
men_s = datos[(datos["Sex"] == "male") & (datos["Survived"] == 1) & (datos["Age"] >=18)]

women_p = women_s.shape[0]/datos[(datos["Sex"] == "female") &(datos["Age"] >= 18)].shape[0] * 100
children_p = Children_s.shape[0]/datos[datos["Age"] < 18].shape[0] *100
men_p = men_s.shape[0]/ datos[(datos["Sex"] == "male") &(datos["Age"] >= 18)].shape[0] * 100

print(f'Percentage of men who survived {men_p}, percentage of women who survived {women_p}, percentage of children who survived {children_p}')


Percentage of men who survived 17.72151898734177, percentage of women who survived 77.18446601941747, percentage of children who survived 53.98230088495575


11. Write a function that returns the percentage of people that survived from a subset given as a boolean Pandas series.

In [None]:
def percentage(series: pd.Series) -> float:
    """Recibe un pandas series con el estado de supervivencia de los pasajeros y devuelve el porcentaje de supervivientes

    Args:
        series (pd.Series): Serie c
        on el estatus de supervivencia de los pasajeros

    Returns:
        float: porcentaje de supervivientes
    """
    total = len(series)
    sobrevivientes = series.sum()
    porcentaje = sobrevivientes/total *100
    return porcentaje

206

# Summarizing

12. What is the median age of the passengers?
13. How many passengers embarked from each port?

In [77]:
median_age = datos[["Age"]].median() 
print("Median Age: ",median_age)

port_embarked = datos["Embarked"].value_counts()
print(port_embarked)

Median Age:  Age    28.0
dtype: float64
Embarked
S    644
C    168
Q     77
Name: count, dtype: int64


14. Generate two hypotheses about how does the survival rate differ among groups of passengers. Write your code to explore both hypotheses.

In [None]:
print("Hypothesis 1: passengers over 60 years old were less likely to survive")

porcentaje_2 = datos[(datos["Age"] < 60) & (datos["Survived"] == 1)].shape[0]/ datos[(datos["Age"] < 60)].shape[0] *100
print("Percentage of passengers over 60 who survived ",percentage)
print("Percentage of passenger under 60 who survived",porcentaje_2)

print("Hypothesis 2: children were the age group most likely to survive")
porcentaje_c = datos[(datos["Age"] < 18) & (datos["Survived"] == 1)].shape[0]/ datos[(datos["Age"] < 18)].shape[0] *100
porcentaje_t = datos[(datos["Age"] >= 18) & (datos["Survived"] == 1)].shape[0]/ datos[(datos["Age"] >= 18)].shape[0] *100
print("Percentage of passengers under 18 who survived ",porcentaje_c)
print("Percentage of passengers over 18 who survived",porcentaje_t)




Hypothesis 1: passengers over 60 years old were less likely to survive
Percentage of passengers over 60 who survived  22.727272727272727
Percentage of passenger under 60 who survived 41.133720930232556
Hypothesis 2: children were the age group most likely to survive
Percentage of passengers under 18 who survived  53.98230088495575
Percentage of passengers over 18who survived 38.10316139767055


In [None]:
import numpy as np

np.arange(10)
import time 

start = time.time()

total = 0
for i in np.arange(10000000):
    total = i + total
print(total)
end = time.time()
print(end - start)
start = time.time()

print(np.sum(np.arange(10000000)))

end = time.time()   
print(end - start)