# Clase 5 - Numpy / Pandas

In [3]:
import numpy as np

ventas = np.random.randint(1,100, size=20).reshape(4,5)

In [5]:
ventas

array([[68, 43, 64, 90, 38],
       [ 2, 40, 73, 98, 91],
       [39, 82, 29, 43, 67],
       [16, 84, 61, 33, 20]])

In [7]:
np.add.reduce(ventas, axis=1)

array([303, 304, 260, 214])

In [9]:
ventas.mean()

54.05

In [11]:
ventas.std()

26.87838350794184

## Missing values

In [14]:
missing = np.array([[1,2,3],[4,np.nan,6], [np.nan,8,9]])

In [16]:
missing

array([[ 1.,  2.,  3.],
       [ 4., nan,  6.],
       [nan,  8.,  9.]])

In [18]:
missing.sum()

nan

In [20]:
missing.mean()

nan

In [26]:
np.nansum(missing)

33.0

In [28]:
np.nanmean(missing)

4.714285714285714

In [30]:
33/7

4.714285714285714

In [32]:
np.nanmax(missing)

9.0

In [34]:
import pandas as pd

In [36]:
ser = pd.Series([1,2,3,4,5,6])

In [38]:
ser

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [40]:
ser0 = pd.Series([1,2,3,4,5,6],
          index=["a","b","c","d","e","f"],
          dtype="int8",
          name="numeros")

In [42]:
ser0

a    1
b    2
c    3
d    4
e    5
f    6
Name: numeros, dtype: int8

In [44]:
ser1 = pd.Series({"Jose":100000, "Maria":125000, "Mariano":80000, "Analia":200000},
          name="saldo")

In [46]:
ser1

Jose       100000
Maria      125000
Mariano     80000
Analia     200000
Name: saldo, dtype: int64

In [48]:
data = np.array(['enero', 'febrero', 'marzo', 'abril', 'mayo'])
ser2 = pd.Series(data, index=[1,2,3,4,5])

In [50]:
ser2

1      enero
2    febrero
3      marzo
4      abril
5       mayo
dtype: object

### Dataframes

In [53]:
df = pd.DataFrame([[0,1,2],[3,4,5],[6,7,8]])
df1 = pd.DataFrame([[0,1,2,3,4],[5,6,7,8,9],[10,11,12,13,14],[15,16,17,18,19]],
             index=["Semana 1","Semana 2","Semana 3","Semana 4"],
             columns=["Lunes", "Martes", "Miercoles", "Jueves", "Viernes"])

In [55]:
df

Unnamed: 0,0,1,2
0,0,1,2
1,3,4,5
2,6,7,8


In [57]:
df1

Unnamed: 0,Lunes,Martes,Miercoles,Jueves,Viernes
Semana 1,0,1,2,3,4
Semana 2,5,6,7,8,9
Semana 3,10,11,12,13,14
Semana 4,15,16,17,18,19


In [59]:
array = np.random.randint(0,50,(4,3))
pd.DataFrame(array,
             index=["CABA","Buenos Aires", "Cordoba", "Santa Fe"],
            columns=["Enero", "Febrero", "Marzo"])

Unnamed: 0,Enero,Febrero,Marzo
CABA,1,10,27
Buenos Aires,31,10,39
Cordoba,29,21,33
Santa Fe,24,11,32


In [61]:
pd.DataFrame({"Columna 1":[100,125,75],
              "Columna 2":[100,125,75],
              "Columna 3":[100,125,75],
              "Columna 4":[100,125,75]},
             index=["A","B","C"])

Unnamed: 0,Columna 1,Columna 2,Columna 3,Columna 4
A,100,100,100,100
B,125,125,125,125
C,75,75,75,75


## Titanic

In [64]:
data = pd.read_csv("titanic.csv")

In [66]:
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## Atributos & Metodos

In [72]:
data.head(20)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


In [74]:
data.tail(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S
882,883,0,3,"Dahlberg, Miss. Gerda Ulrika",female,22.0,0,0,7552,10.5167,,S
883,884,0,2,"Banfield, Mr. Frederick James",male,28.0,0,0,C.A./SOTON 34068,10.5,,S
884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


In [76]:
data.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

In [78]:
data.shape

(891, 12)

In [80]:
data.size

10692

In [82]:
data.index

RangeIndex(start=0, stop=891, step=1)

In [84]:
data.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [86]:
data.ndim

2

data.info()

In [90]:
714/891

0.8013468013468014

In [92]:
204/891

0.22895622895622897

In [94]:
891-714

177

## Indexing

In [99]:
miDF = pd.DataFrame({"Andrea":[100,500,400],
                     "Maria":[100,500,400],
                     "Luis":[100,500,400],
                     "Analia":[100,500,400],
                     "Juan":[100,500,400]})

In [101]:
miDF

Unnamed: 0,Andrea,Maria,Luis,Analia,Juan
0,100,100,100,100,100
1,500,500,500,500,500
2,400,400,400,400,400


In [103]:
miDF["Maria"]

0    100
1    500
2    400
Name: Maria, dtype: int64

In [105]:
miDF[["Maria", "Analia"]]

Unnamed: 0,Maria,Analia
0,100,100
1,500,500
2,400,400


In [111]:
data[["Age","Name", "Survived"]].head(20)

Unnamed: 0,Age,Name,Survived
0,22.0,"Braund, Mr. Owen Harris",0
1,38.0,"Cumings, Mrs. John Bradley (Florence Briggs Th...",1
2,26.0,"Heikkinen, Miss. Laina",1
3,35.0,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",1
4,35.0,"Allen, Mr. William Henry",0
5,,"Moran, Mr. James",0
6,54.0,"McCarthy, Mr. Timothy J",0
7,2.0,"Palsson, Master. Gosta Leonard",0
8,27.0,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",1
9,14.0,"Nasser, Mrs. Nicholas (Adele Achem)",1


In [113]:
data

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## Location

- .loc
- .iloc

In [118]:
data.loc[0,"Name"]

'Braund, Mr. Owen Harris'

In [120]:
data.loc[0, ["Name", "Age", "Sex", "Survived"]]

Name        Braund, Mr. Owen Harris
Age                            22.0
Sex                            male
Survived                          0
Name: 0, dtype: object

In [122]:
data.loc[[0,10,30], ["Name", "Age", "Sex", "Survived"]]

Unnamed: 0,Name,Age,Sex,Survived
0,"Braund, Mr. Owen Harris",22.0,male,0
10,"Sandstrom, Miss. Marguerite Rut",4.0,female,1
30,"Uruchurtu, Don. Manuel E",40.0,male,0


In [130]:
data.loc[10:30, ["Name", "Age", "Sex", "Survived"]]

Unnamed: 0,Name,Age,Sex,Survived
10,"Sandstrom, Miss. Marguerite Rut",4.0,female,1
11,"Bonnell, Miss. Elizabeth",58.0,female,1
12,"Saundercock, Mr. William Henry",20.0,male,0
13,"Andersson, Mr. Anders Johan",39.0,male,0
14,"Vestrom, Miss. Hulda Amanda Adolfina",14.0,female,0
15,"Hewlett, Mrs. (Mary D Kingcome)",55.0,female,1
16,"Rice, Master. Eugene",2.0,male,0
17,"Williams, Mr. Charles Eugene",,male,1
18,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",31.0,female,0
19,"Masselmani, Mrs. Fatima",,female,1


In [134]:
data.loc[15:25, "Pclass":"Ticket"]

Unnamed: 0,Pclass,Name,Sex,Age,SibSp,Parch,Ticket
15,2,"Hewlett, Mrs. (Mary D Kingcome)",female,55.0,0,0,248706
16,3,"Rice, Master. Eugene",male,2.0,4,1,382652
17,2,"Williams, Mr. Charles Eugene",male,,0,0,244373
18,3,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",female,31.0,1,0,345763
19,3,"Masselmani, Mrs. Fatima",female,,0,0,2649
20,2,"Fynney, Mr. Joseph J",male,35.0,0,0,239865
21,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698
22,3,"McGowan, Miss. Anna ""Annie""",female,15.0,0,0,330923
23,1,"Sloper, Mr. William Thompson",male,28.0,0,0,113788
24,3,"Palsson, Miss. Torborg Danira",female,8.0,3,1,349909


In [140]:
data.sample(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
335,336,0,3,"Denkoff, Mr. Mitto",male,,0,0,349225,7.8958,,S
852,853,0,3,"Boulos, Miss. Nourelain",female,9.0,1,1,2678,15.2458,,C
369,370,1,1,"Aubart, Mme. Leontine Pauline",female,24.0,0,0,PC 17477,69.3,B35,C
36,37,1,3,"Mamee, Mr. Hanna",male,,0,0,2677,7.2292,,C
528,529,0,3,"Salonen, Mr. Johan Werner",male,39.0,0,0,3101296,7.925,,S
699,700,0,3,"Humblen, Mr. Adolf Mathias Nicolai Olsen",male,42.0,0,0,348121,7.65,F G63,S
95,96,0,3,"Shorney, Mr. Charles Joseph",male,,0,0,374910,8.05,,S
284,285,0,1,"Smith, Mr. Richard William",male,,0,0,113056,26.0,A19,S
257,258,1,1,"Cherry, Miss. Gladys",female,30.0,0,0,110152,86.5,B77,S
307,308,1,1,"Penasco y Castellana, Mrs. Victor de Satode (M...",female,17.0,1,0,PC 17758,108.9,C65,C


In [142]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [144]:
data.iloc[0,4]

'male'

In [146]:
data.iloc[[40,50], [1,3,4,5]]

Unnamed: 0,Survived,Name,Sex,Age
40,0,"Ahlin, Mrs. Johan (Johanna Persdotter Larsson)",female,40.0
50,0,"Panula, Master. Juha Niilo",male,7.0


In [148]:
data.iloc[150:155, -4:]

Unnamed: 0,Ticket,Fare,Cabin,Embarked
150,S.O.P. 1166,12.525,,S
151,113776,66.6,C2,S
152,A.5. 11206,8.05,,S
153,A/5. 851,14.5,,S
154,Fa 265302,7.3125,,S


In [158]:
data[(data["Age"] > 30) & (data["Age"] < 35)]

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
18,19,0,3,"Vander Planke, Mrs. Julius (Emelia Maria Vande...",female,31.0,1,0,345763,18.0000,,S
21,22,1,2,"Beesley, Mr. Lawrence",male,34.0,0,0,248698,13.0000,D56,S
70,71,0,2,"Jenkin, Mr. Stephen Curnow",male,32.0,0,0,C.A. 33111,10.5000,,S
74,75,1,3,"Bing, Mr. Lee",male,32.0,0,0,1601,56.4958,,S
85,86,1,3,"Backstrom, Mrs. Karl Alfred (Maria Mathilda Gu...",female,33.0,3,0,3101278,15.8500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
843,844,0,3,"Lemberopolous, Mr. Peter L",male,34.5,0,0,2683,6.4375,,C
867,868,0,1,"Roebling, Mr. Washington Augustus II",male,31.0,0,0,PC 17590,50.4958,A24,S
872,873,0,1,"Carlsson, Mr. Frans Olof",male,33.0,0,0,695,5.0000,B51 B53 B55,S
881,882,0,3,"Markun, Mr. Johann",male,33.0,0,0,349257,7.8958,,S


In [162]:
data1= data.loc[((data["Age"] > 30) & (data["Age"] < 35)),  ["Name", "Pclass", "Age", "Survived"] ]

In [164]:
data1.to_csv("data1.csv")

# Ejercicios 

- Confirmar que la cantidad de pasajeros que se salvaron en proporción fue mayor en los casos de las Clase 1
- Confirmar que mujeres y niños ( < 10 años ) se salvaron mas que varones (en proporciones)