# **PROGRAMMING WITH PANDAS**

### DataFrames

In [1]:
import pandas as pd

In [3]:
df = pd.DataFrame([
    pd.Series({'Name':'Franss','Age':27}),
    pd.Series({'Name':'Fernando','Age':28})
])
df

Unnamed: 0,Name,Age
0,Franss,27
1,Fernando,28


In [4]:
df = pd.DataFrame({
    'Name':['Franss','Fernando','Davis',
              'Eduardo','Julio','Scott'],
    'Age':[27,28,27,26,30,34],
    'Degree':['Data Engineer', 'Mathematics', 'Data Scientist',
              'Computer Science', 'Statistics', 'Systems Engineer'],
    'Mail':['franss@gmail.com','fernando@yahoo.es',
              'davis@gmail.com','eduardo@gmail.com',
              'julio@gmail.com','scott@gmail.com'],
    'Date':['2016-09-16','2017-01-20',
             '2015-04-20','2018-02-27',
             '2020-03-11','2021-01-28'],
    'Weight':[78,65,98,69,80,63],
    'Height':[1.56,1.86,1.58,1.67,1.73,1.8]
})
df

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8


## **Basic Methods**

In [5]:
df.shape

(6, 7)

In [6]:
df.tail(2)

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6 entries, 0 to 5
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Name    6 non-null      object 
 1   Age     6 non-null      int64  
 2   Degree  6 non-null      object 
 3   Mail    6 non-null      object 
 4   Date    6 non-null      object 
 5   Weight  6 non-null      int64  
 6   Height  6 non-null      float64
dtypes: float64(1), int64(2), object(4)
memory usage: 464.0+ bytes


In [8]:
df.shape

(6, 7)

In [9]:
df.describe()

Unnamed: 0,Age,Weight,Height
count,6.0,6.0,6.0
mean,28.666667,75.5,1.7
std,2.94392,12.973049,0.119499
min,26.0,63.0,1.56
25%,27.0,66.0,1.6025
50%,27.5,73.5,1.7
75%,29.5,79.5,1.7825
max,34.0,98.0,1.86


In [10]:
df.values

array([['Franss', 27, 'Data Engineer', 'franss@gmail.com', '2016-09-16',
        78, 1.56],
       ['Fernando', 28, 'Mathematics', 'fernando@yahoo.es', '2017-01-20',
        65, 1.86],
       ['Davis', 27, 'Data Scientist', 'davis@gmail.com', '2015-04-20',
        98, 1.58],
       ['Eduardo', 26, 'Computer Science', 'eduardo@gmail.com',
        '2018-02-27', 69, 1.67],
       ['Julio', 30, 'Statistics', 'julio@gmail.com', '2020-03-11', 80,
        1.73],
       ['Scott', 34, 'Systems Engineer', 'scott@gmail.com', '2021-01-28',
        63, 1.8]], dtype=object)

In [11]:
list(df.columns)

['Name', 'Age', 'Degree', 'Mail', 'Date', 'Weight', 'Height']

In [12]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [14]:
df.sort_values("Weight",ascending=False)

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8


## "Subsetting" of a Dataframe

In [15]:
df.columns

Index(['Name', 'Age', 'Degree', 'Mail', 'Date', 'Weight', 'Height'], dtype='object')

In [16]:
df[['Name','Age']]

Unnamed: 0,Name,Age
0,Franss,27
1,Fernando,28
2,Davis,27
3,Eduardo,26
4,Julio,30
5,Scott,34


In [17]:
cols_a_extraer = ['Degree','Name']
df[cols_a_extraer]

Unnamed: 0,Degree,Name
0,Data Engineer,Franss
1,Mathematics,Fernando
2,Data Scientist,Davis
3,Computer Science,Eduardo
4,Statistics,Julio
5,Systems Engineer,Scott


In [18]:
df.filter(cols_a_extraer)

Unnamed: 0,Degree,Name
0,Data Engineer,Franss
1,Mathematics,Fernando
2,Data Scientist,Davis
3,Computer Science,Eduardo
4,Statistics,Julio
5,Systems Engineer,Scott


In [19]:
df['Age']>18

0    True
1    True
2    True
3    True
4    True
5    True
Name: Age, dtype: bool

In [20]:
df[df['Age']>18]

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8


In [23]:
df[df['Name']=='Davis']

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58


In [24]:
df[df['Date']> "2017-05-01"]

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8


In [25]:
df

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8


In [28]:
es_mathematics = df['Degree'] == 'Mathematics'
no_es_milenial_x = df['Age'] < 30

In [29]:
df[es_mathematics & no_es_milenial_x]

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86


In [30]:
df[(df['Degree'] == 'Mathematics') & (df['Age'] < 35)]

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86


In [31]:
df['Age'].isin([18,21])

0    False
1    False
2    False
3    False
4    False
5    False
Name: Age, dtype: bool

In [33]:
df.query("Age.isin([26,35])")

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67


## **Add Columns**

In [34]:
df['imc' ] = df['Weight'] / df['Height'] ** 2

In [35]:
df

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height,imc
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8,19.444444


In [36]:
df.assign(imc_1 = df.Weight/df.Height **2)

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height,imc,imc_1
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,32.051282
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,18.788299
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,39.256529
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,24.740937
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,26.729927
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8,19.444444,19.444444


In [37]:
df.assign(imc_otro = lambda x: x.Weight / x.Height ** 2)

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height,imc,imc_otro
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,32.051282
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,18.788299
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,39.256529
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,24.740937
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,26.729927
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8,19.444444,19.444444


In [38]:
df['Height_2'] = df['Height'] **2

In [39]:
df['imc_2'] = df['Weight']/df['Height_2']

In [40]:
(
    df
    .assign(
        height_2 = lambda x: x.Height ** 2,
        weight_2 = lambda x: x.Weight ** 2)
    .assign(imc_2 = lambda x: x.Weight / x.height_2)
)

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2,height_2,weight_2
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282,2.4336,6084
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299,3.4596,4225
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,2.4964,39.256529,2.4964,9604
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,2.7889,24.740937,2.7889,4761
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,2.9929,26.729927,2.9929,6400
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8,19.444444,3.24,19.444444,3.24,3969


## **Summary Data**

In [41]:
df['Age'].mean()

28.666666666666668

In [42]:
df.Age.min()

26

In [43]:
df.mean(numeric_only=True)

Age         28.666667
Weight      75.500000
Height       1.700000
imc         26.835237
Height_2     2.901900
imc_2       26.835237
dtype: float64

In [44]:
def percentil_50(column):
  return column.quantile(0.5)

In [45]:
df['Age'].median()

27.5

In [46]:
df['Age'].agg(percentil_50)

27.5

In [47]:
df['Age'].agg(lambda x: x.median())

27.5

In [48]:
def elev_mean(column):
  return (column**2).mean()

In [49]:
df['Age'].agg([elev_mean,percentil_50])

elev_mean       829.0
percentil_50     27.5
Name: Age, dtype: float64

In [50]:
df[['Weight']].assign(weight_acum = lambda x: x.Weight.cumsum())#.cumsum()

Unnamed: 0,Weight,weight_acum
0,78,78
1,65,143
2,98,241
3,69,310
4,80,390
5,63,453


In [51]:
(
    df[['Weight']]
    .eval("""
    Weight_2 = Weight/20**2
    Weight_3 = Weight +1
    """)
)

Unnamed: 0,Weight,Weight_2,Weight_3
0,78,0.195,79.0
1,65,0.1625,66.0
2,98,0.245,99.0
3,69,0.1725,70.0
4,80,0.2,81.0
5,63,0.1575,64.0


## **Count**

In [54]:
import pandas as pd

data = {
    'Name': ['Alice', None, 'Charlie', 'David', 'Eve', 'Frank', 'Grace', 'Hank', 'Ivy', 'Jack', 'Alice', 'Bob'],
    'Age': [25, 30, 22, 35, 28, 40, 27, 32, 26, 29, None, 30],
    'Salary': [None, 60000, 55000, 70000, 65000, 75000, 60000, 72000, 59000, 68000, 50000, 60000],
    'Date of entry': pd.date_range(start='2023-01-01', periods=12, freq='D'),
    'Departament': pd.Categorical(['Sales', 'IT', 'RRHH', 'Sales', 'IT',
                                   'RRHH', 'Sales', 'IT', 'RRHH', 'Sales',
                                   'Sales', 'IT']),
    'Additional Information': [{'Hobbies': 'Hiking', 'City': 'Nueva York'},
                             {'Hobbies': 'Painting', 'City': 'Los Ángeles'},
                             {'Hobbies': 'Music', 'City': 'Chicago'},
                             {'Hobbies': 'Photography', 'City': 'San Francisco'},
                             {'Hobbies': 'Sports', 'City': 'Miami'},
                             {'Hobbies': 'Reading', 'City': 'Seattle'},
                             {'Hobbies': 'Gardening', 'City': 'Dallas'},
                             {'Hobbies': 'Cooking', 'City': 'Houston'},
                             {'Hobbies': 'Travel', 'City': 'Boston'},
                             {'Hobbies': 'Diving', 'City': 'Denver'},
                             {'Hobbies': 'Hiking', 'City': 'Nueva York'},
                             {'Hobbies': 'Painting', 'City': 'Los Ángeles'}]
}

df_raw = pd.DataFrame(data)
df_raw


Unnamed: 0,Name,Age,Salary,Date of entry,Departament,Additional Information
0,Alice,25.0,,2023-01-01,Sales,"{'Hobbies': 'Hiking', 'City': 'Nueva York'}"
1,,30.0,60000.0,2023-01-02,IT,"{'Hobbies': 'Painting', 'City': 'Los Ángeles'}"
2,Charlie,22.0,55000.0,2023-01-03,RRHH,"{'Hobbies': 'Music', 'City': 'Chicago'}"
3,David,35.0,70000.0,2023-01-04,Sales,"{'Hobbies': 'Photography', 'City': 'San Franci..."
4,Eve,28.0,65000.0,2023-01-05,IT,"{'Hobbies': 'Sports', 'City': 'Miami'}"
5,Frank,40.0,75000.0,2023-01-06,RRHH,"{'Hobbies': 'Reading', 'City': 'Seattle'}"
6,Grace,27.0,60000.0,2023-01-07,Sales,"{'Hobbies': 'Gardening', 'City': 'Dallas'}"
7,Hank,32.0,72000.0,2023-01-08,IT,"{'Hobbies': 'Cooking', 'City': 'Houston'}"
8,Ivy,26.0,59000.0,2023-01-09,RRHH,"{'Hobbies': 'Travel', 'City': 'Boston'}"
9,Jack,29.0,68000.0,2023-01-10,Sales,"{'Hobbies': 'Diving', 'City': 'Denver'}"




In [55]:
(
    df_raw
    .reset_index()
    .dropna(subset=['Name','Age','Salary'])
    .reset_index(drop=True)
)


Unnamed: 0,index,Name,Age,Salary,Date of entry,Departament,Additional Information
0,2,Charlie,22.0,55000.0,2023-01-03,RRHH,"{'Hobbies': 'Music', 'City': 'Chicago'}"
1,3,David,35.0,70000.0,2023-01-04,Sales,"{'Hobbies': 'Photography', 'City': 'San Franci..."
2,4,Eve,28.0,65000.0,2023-01-05,IT,"{'Hobbies': 'Sports', 'City': 'Miami'}"
3,5,Frank,40.0,75000.0,2023-01-06,RRHH,"{'Hobbies': 'Reading', 'City': 'Seattle'}"
4,6,Grace,27.0,60000.0,2023-01-07,Sales,"{'Hobbies': 'Gardening', 'City': 'Dallas'}"
5,7,Hank,32.0,72000.0,2023-01-08,IT,"{'Hobbies': 'Cooking', 'City': 'Houston'}"
6,8,Ivy,26.0,59000.0,2023-01-09,RRHH,"{'Hobbies': 'Travel', 'City': 'Boston'}"
7,9,Jack,29.0,68000.0,2023-01-10,Sales,"{'Hobbies': 'Diving', 'City': 'Denver'}"
8,11,Bob,30.0,60000.0,2023-01-12,IT,"{'Hobbies': 'Painting', 'City': 'Los Ángeles'}"


In [56]:
df_raw.Departament = df_raw.Departament.astype(str)

In [57]:
type(df_raw[['Additional Information']].iloc[0,0])

dict

In [58]:
df_raw

Unnamed: 0,Name,Age,Salary,Date of entry,Departament,Additional Information
0,Alice,25.0,,2023-01-01,Sales,"{'Hobbies': 'Hiking', 'City': 'Nueva York'}"
1,,30.0,60000.0,2023-01-02,IT,"{'Hobbies': 'Painting', 'City': 'Los Ángeles'}"
2,Charlie,22.0,55000.0,2023-01-03,RRHH,"{'Hobbies': 'Music', 'City': 'Chicago'}"
3,David,35.0,70000.0,2023-01-04,Sales,"{'Hobbies': 'Photography', 'City': 'San Franci..."
4,Eve,28.0,65000.0,2023-01-05,IT,"{'Hobbies': 'Sports', 'City': 'Miami'}"
5,Frank,40.0,75000.0,2023-01-06,RRHH,"{'Hobbies': 'Reading', 'City': 'Seattle'}"
6,Grace,27.0,60000.0,2023-01-07,Sales,"{'Hobbies': 'Gardening', 'City': 'Dallas'}"
7,Hank,32.0,72000.0,2023-01-08,IT,"{'Hobbies': 'Cooking', 'City': 'Houston'}"
8,Ivy,26.0,59000.0,2023-01-09,RRHH,"{'Hobbies': 'Travel', 'City': 'Boston'}"
9,Jack,29.0,68000.0,2023-01-10,Sales,"{'Hobbies': 'Diving', 'City': 'Denver'}"




In [60]:
df_raw.drop(columns=['Date of entry'],inplace=True)

In [62]:
df_raw.loc[0,"Departament"] = "IT"

In [64]:
df

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299
2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,2.4964,39.256529
3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,2.7889,24.740937
4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,2.9929,26.729927
5,Scott,34,Systems Engineer,scott@gmail.com,2021-01-28,63,1.8,19.444444,3.24,19.444444


## Slicing and Indexación of Data

In [65]:
df.columns

Index(['Name', 'Age', 'Degree', 'Mail', 'Date', 'Weight', 'Height', 'imc',
       'Height_2', 'imc_2'],
      dtype='object')

In [66]:
df.index

RangeIndex(start=0, stop=6, step=1)

In [67]:
df_ind = df.set_index("Name")

In [68]:
df_ind.head()

Unnamed: 0_level_0,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282
Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299
Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,2.4964,39.256529
Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,2.7889,24.740937
Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,2.9929,26.729927


In [69]:
df.reset_index().head()

Unnamed: 0,index,Name,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2
0,0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282
1,1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299
2,2,Davis,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,2.4964,39.256529
3,3,Eduardo,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,2.7889,24.740937
4,4,Julio,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,2.9929,26.729927


In [70]:
df_ind.reset_index(drop=True).head()

Unnamed: 0,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2
0,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282
1,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299
2,27,Data Scientist,davis@gmail.com,2015-04-20,98,1.58,39.256529,2.4964,39.256529
3,26,Computer Science,eduardo@gmail.com,2018-02-27,69,1.67,24.740937,2.7889,24.740937
4,30,Statistics,julio@gmail.com,2020-03-11,80,1.73,26.729927,2.9929,26.729927


In [76]:
df[df.Name.isin(['Franss','Fernando'])]

Unnamed: 0,Name,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2
0,Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282
1,Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299


In [77]:
df_ind.loc[['Franss','Fernando']]

Unnamed: 0_level_0,Age,Degree,Mail,Date,Weight,Height,imc,Height_2,imc_2
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Franss,27,Data Engineer,franss@gmail.com,2016-09-16,78,1.56,32.051282,2.4336,32.051282
Fernando,28,Mathematics,fernando@yahoo.es,2017-01-20,65,1.86,18.788299,3.4596,18.788299


In [79]:
df_raw.set_index("Name").loc['Bob']

Age                                                                 30.0
Salary                                                           60000.0
Departament                                                           IT
Additional Information    {'Hobbies': 'Painting', 'City': 'Los Ángeles'}
Name: Bob, dtype: object

In [80]:
df_raw[(df_raw.Departament=='IT') & (df_raw.Age<30)]

Unnamed: 0,Name,Age,Salary,Departament,Additional Information
0,Alice,25.0,,IT,"{'Hobbies': 'Hiking', 'City': 'Nueva York'}"
4,Eve,28.0,65000.0,IT,"{'Hobbies': 'Sports', 'City': 'Miami'}"




In [82]:
df_raw.query("Departament=='IT' and Age<30")

Unnamed: 0,Name,Age,Salary,Departament,Additional Information
0,Alice,25.0,,IT,"{'Hobbies': 'Hiking', 'City': 'Nueva York'}"
4,Eve,28.0,65000.0,IT,"{'Hobbies': 'Sports', 'City': 'Miami'}"




In [83]:
(
    df_raw
    .set_index(['Departament','Name'])
    .loc[[
        ('IT','Bob'),
        ('IT','Eve'),
    ]]
 )

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Salary,Additional Information
Departament,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
IT,Bob,30.0,60000.0,"{'Hobbies': 'Painting', 'City': 'Los Ángeles'}"
IT,Eve,28.0,65000.0,"{'Hobbies': 'Sports', 'City': 'Miami'}"




In [84]:
df_raw.set_index(['Departament','Name']).sort_index()

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Salary,Additional Information
Departament,Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
IT,Alice,25.0,,"{'Hobbies': 'Hiking', 'City': 'Nueva York'}"
IT,Bob,30.0,60000.0,"{'Hobbies': 'Painting', 'City': 'Los Ángeles'}"
IT,Eve,28.0,65000.0,"{'Hobbies': 'Sports', 'City': 'Miami'}"
IT,Hank,32.0,72000.0,"{'Hobbies': 'Cooking', 'City': 'Houston'}"
IT,,30.0,60000.0,"{'Hobbies': 'Painting', 'City': 'Los Ángeles'}"
RRHH,Charlie,22.0,55000.0,"{'Hobbies': 'Music', 'City': 'Chicago'}"
RRHH,Frank,40.0,75000.0,"{'Hobbies': 'Reading', 'City': 'Seattle'}"
RRHH,Ivy,26.0,59000.0,"{'Hobbies': 'Travel', 'City': 'Boston'}"
Sales,Alice,,50000.0,"{'Hobbies': 'Hiking', 'City': 'Nueva York'}"
Sales,David,35.0,70000.0,"{'Hobbies': 'Photography', 'City': 'San Franci..."




### Lectura

In [88]:
#pd.read_html("https://github.com/DavisGarciaFernandez")[0]