
# Pandas DataFrame exercises


In [None]:
# Import the numpy package under the name np
import numpy as np

# Import the pandas package under the name pd
import pandas as pd


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

## DataFrame creation

### Create an empty pandas DataFrame


In [None]:
# your code goes here
pd.DataFrame(data=[])

pd.DataFrame(data=[], index=[1, 2,3], columns=['a', 'b', 'c'])

Unnamed: 0,a,b,c
1,,,
2,,,
3,,,


<img width=400 src="https://cdn.dribbble.com/users/4678/screenshots/1986600/avengers.png"></img>

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Create a `marvel_df` pandas DataFrame with the given marvel data


In [None]:
marvel_data = [
    ['Spider-Man', 'male', 1962],
    ['Captain America', 'male', 1941],
    ['Wolverine', 'male', 1974],
    ['Iron Man', 'male', 1963],
    ['Thor', 'male', 1963],
    ['Thing', 'male', 1961],
    ['Mister Fantastic', 'male', 1961],
    ['Hulk', 'male', 1962],
    ['Beast', 'male', 1963],
    ['Invisible Woman', 'female', 1961],
    ['Storm', 'female', 1975],
    ['Namor', 'male', 1939],
    ['Hawkeye', 'male', 1964],
    ['Daredevil', 'male', 1964],
    ['Doctor Strange', 'male', 1963],
    ['Hank Pym', 'male', 1962],
    ['Scarlet Witch', 'female', 1964],
    ['Wasp', 'female', 1963],
    ['Black Widow', 'female', 1964],
    ['Vision', 'male', 1968]
]

In [None]:
# your code goes here
marvel_df = pd.DataFrame(data=marvel_data)
marvel_df

Unnamed: 0,0,1,2
0,Spider-Man,male,1962
1,Captain America,male,1941
2,Wolverine,male,1974
3,Iron Man,male,1963
4,Thor,male,1963
5,Thing,male,1961
6,Mister Fantastic,male,1961
7,Hulk,male,1962
8,Beast,male,1963
9,Invisible Woman,female,1961


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Add column names to the `marvel_df`
 

In [None]:
col_names = ['name', 'sex', 'first_appearance']


In [None]:
# your code goes here
marvel_df.columns = col_names
marvel_df

Unnamed: 0,name,sex,first_appearance
0,Spider-Man,male,1962
1,Captain America,male,1941
2,Wolverine,male,1974
3,Iron Man,male,1963
4,Thor,male,1963
5,Thing,male,1961
6,Mister Fantastic,male,1961
7,Hulk,male,1962
8,Beast,male,1963
9,Invisible Woman,female,1961


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Add index names to the `marvel_df` (use the character name as index)


In [None]:
# your code goes here
marvel_df.index = marvel_df['name']
marvel_df

Unnamed: 0_level_0,name,sex,first_appearance
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Spider-Man,Spider-Man,male,1962
Captain America,Captain America,male,1941
Wolverine,Wolverine,male,1974
Iron Man,Iron Man,male,1963
Thor,Thor,male,1963
Thing,Thing,male,1961
Mister Fantastic,Mister Fantastic,male,1961
Hulk,Hulk,male,1962
Beast,Beast,male,1963
Invisible Woman,Invisible Woman,female,1961


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Drop the name column as it's now the index

In [None]:
# your code goes here
marvel_df.drop(['name'], axis=1, inplace=True)

marvel_df

Unnamed: 0_level_0,sex,first_appearance
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Spider-Man,male,1962
Captain America,male,1941
Wolverine,male,1974
Iron Man,male,1963
Thor,male,1963
Thing,male,1961
Mister Fantastic,male,1961
Hulk,male,1962
Beast,male,1963
Invisible Woman,female,1961


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Drop 'Namor' and 'Hank Pym' rows


In [None]:
# your code goes here

marvel_df.drop(['Namor', 'Hank Pym'], axis=0, inplace=True)
marvel_df

Unnamed: 0_level_0,sex,first_appearance
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Spider-Man,male,1962
Captain America,male,1941
Wolverine,male,1974
Iron Man,male,1963
Thor,male,1963
Thing,male,1961
Mister Fantastic,male,1961
Hulk,male,1962
Beast,male,1963
Invisible Woman,female,1961


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

## DataFrame selection, slicing and indexation

### Show the first 5 elements on `marvel_df`
 

In [None]:
# your code goes here
marvel_df.iloc[:5]

marvel_df.head(5)


Unnamed: 0_level_0,sex,first_appearance
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Spider-Man,male,1962
Captain America,male,1941
Wolverine,male,1974
Iron Man,male,1963
Thor,male,1963


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Show the last 5 elements on `marvel_df`


In [None]:
# your code goes here
marvel_df.tail(5)


Unnamed: 0_level_0,sex,first_appearance
name,Unnamed: 1_level_1,Unnamed: 2_level_1
Doctor Strange,male,1963
Scarlet Witch,female,1964
Wasp,female,1963
Black Widow,female,1964
Vision,male,1968


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Show just the sex of the first 5 elements on `marvel_df`

In [None]:
# your code goes here


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Show the first and last elements on `marvel_df`


In [None]:
# your code goes here


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

## DataFrame manipulation and operations

### Modify the `first_appearance` of 'Vision' to year 1964

In [None]:
# your code goes here

print(marvel_df.loc['Vision'])
marvel_df.loc['Vision', 'first_appearance'] = 1964

print(marvel_df.loc['Vision'])


sex                 male
first_appearance    1968
Name: Vision, dtype: object
sex                 male
first_appearance    1964
Name: Vision, dtype: object


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Add a new column to `marvel_df` called 'years_since' with the years since `first_appearance`


In [None]:
# your code goes here

marvel_df['years_since'] = 2022 - marvel_df['first_appearance']
marvel_df

Unnamed: 0_level_0,sex,first_appearance,years_since
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Spider-Man,male,1962,60
Captain America,male,1941,81
Wolverine,male,1974,48
Iron Man,male,1963,59
Thor,male,1963,59
Thing,male,1961,61
Mister Fantastic,male,1961,61
Hulk,male,1962,60
Beast,male,1963,59
Invisible Woman,female,1961,61


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

## DataFrame boolean arrays (also called masks)

### Given the `marvel_df` pandas DataFrame, make a mask showing the female characters


In [None]:
# your code goes here
marvel_df[marvel_df['sex'] == 'female']

Unnamed: 0_level_0,sex,first_appearance,years_since
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Invisible Woman,female,1961,61
Storm,female,1975,47
Scarlet Witch,female,1964,58
Wasp,female,1963,59
Black Widow,female,1964,58


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Given the `marvel_df` pandas DataFrame, get the male characters


In [None]:
# your code goes here


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Given the `marvel_df` pandas DataFrame, get the characters with `first_appearance` after 1970


In [None]:
# your code goes here


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Given the `marvel_df` pandas DataFrame, get the female characters with `first_appearance` after 1970

In [None]:
# your code goes here


![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)

## DataFrame summary statistics

### Show basic statistics of `marvel_df`

In [None]:
# your code goes here

marvel_df.describe()


Unnamed: 0,first_appearance,years_since
count,18.0,18.0
mean,1962.888889,59.111111
std,6.720372,6.720372
min,1941.0,47.0
25%,1962.0,58.0
50%,1963.0,59.0
75%,1964.0,60.0
max,1975.0,81.0


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Given the `marvel_df` pandas DataFrame, show the mean value of `first_appearance`

In [None]:
# your code goes here


![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Given the `marvel_df` pandas DataFrame, show the min value of `first_appearance`


In [None]:
# your code goes here

marvel_df['first_appearance'].min()


1941

![green-divider](https://user-images.githubusercontent.com/7065401/52071924-c003ad80-2562-11e9-8297-1c6595f8a7ff.png)

### Given the `marvel_df` pandas DataFrame, get the characters with the min value of `first_appearance`

![purple-divider](https://user-images.githubusercontent.com/7065401/52071927-c1cd7100-2562-11e9-908a-dde91ba14e59.png)
