# Part 1: Pandas - from Zero to Hero

## DataFrame Basics II

### Filtering DataFrames with one Condition

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head(10)

In [None]:
titanic.sex.head(10)

In [None]:
titanic.sex == "male"

In [None]:
titanic[titanic.sex == "male"]["fare"]

In [None]:
titanic.loc[titanic.sex == "male", "fare"]

In [None]:
mask1 = titanic.sex == "male"
mask1

In [None]:
titanic_male = titanic.loc[mask1]

In [None]:
titanic_male.head()

In [None]:
titanic.dtypes# == object

In [None]:
mask2 = titanic.dtypes == object
mask2

In [None]:
titanic.loc[:, ~mask2]

In [None]:
titanic.loc[mask1, ~mask2]

### Filtering DataFrames with many Conditions (AND)

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head(10)

In [None]:
mask1 = titanic.sex == "male"
mask1.head()

In [None]:
mask2 = titanic.age > 14
mask2.head()

In [None]:
(mask1 & mask2).head()

In [None]:
male_adult = titanic.loc[mask1 & mask2, ["survived", "pclass", "sex", "age"]]
male_adult.head(20)

In [None]:
male_adult.info()

In [None]:
male_adult.describe()

In [None]:
titanic.describe()

### Filtering DataFrames with many Conditions (OR)

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
mask1 = titanic.sex == "female"
mask1.head(20)

In [None]:
mask2 = titanic.age < 14
mask2.head(20)

In [None]:
(mask1 | mask2).head(11)

In [None]:
titanic.loc[mask1 | mask2]

In [None]:
wom_or_chi = titanic.loc[mask1 | mask2, ["survived", "pclass", "sex", "age"]]

In [None]:
wom_or_chi.head()

In [None]:
wom_or_chi.info()

In [None]:
wom_or_chi.describe()

In [None]:
titanic.describe()

### Advanced Filtering with between(), isin() and ~

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
og_1988 = summer.loc[summer.Year == 1988]

In [None]:
og_1988.head()

In [None]:
og_1988.tail()

In [None]:
og_1988.info()

In [None]:
og_since1992 = summer.loc[summer.Year >= 1992]

In [None]:
og_since1992.head()

In [None]:
og_since1992.tail()

In [None]:
summer.Year.between(1960, 1969).head()

In [None]:
og_60s = summer.loc[summer.Year.between(1960, 1969, inclusive=True)]

In [None]:
og_60s.head()

In [None]:
og_60s.tail()

In [None]:
my_favourite_games = [1972, 1996]

In [None]:
summer.Year.isin(my_favourite_games).head()

In [None]:
og_72_96 = summer.loc[summer.Year.isin(my_favourite_games)]

In [None]:
og_72_96.head()

In [None]:
og_72_96.tail()

In [None]:
og_not_72_96 = summer.loc[~summer.Year.isin(my_favourite_games)]

In [None]:
og_not_72_96.head()

In [None]:
og_not_72_96.Year.unique()

### any() and all()

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.sex == "male"

In [None]:
(titanic.sex == "male").any()

In [None]:
(titanic.sex == "male").all()

In [None]:
(titanic.age == 80.0).any()

In [None]:
pd.Series([-1, 0.5 , 1, -0.1, 0]).any()

In [None]:
titanic.fare.all()

### Removing Columns

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
summer.drop(columns = "Sport")

In [None]:
summer.head()

In [None]:
summer.drop(columns = ["Sport", "Discipline"], inplace=True)

In [None]:
summer.drop(labels = "Event", axis = "columns", inplace= True)

In [None]:
#del summer["Event"]

In [None]:
summer.head()

In [None]:
summer = pd.read_csv("summer.csv")

In [None]:
summer.head()

In [None]:
summer = summer.loc[:,["Year", "City", "Athlete", "Country", "Gender", "Medal"]]

In [None]:
summer.head()

### Removing Rows

In [None]:
import pandas as pd

In [None]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")

In [None]:
summer.head(10)

In [None]:
summer.drop(index = "HAJOS, Alfred")

In [None]:
summer.drop(index = ["HAJOS, Alfred","HERSCHMANN, Otto"], inplace = True)

In [None]:
summer.head()

In [None]:
summer.drop(labels = "DRIVAS, Dimitrios", axis = 0,  inplace = True)

In [None]:
summer.head()

In [None]:
summer = summer.loc[summer.Year == 1996]

In [None]:
summer.head()

In [None]:
summer = pd.read_csv("summer.csv", index_col = "Athlete")

In [None]:
summer.head()

In [None]:
mask1 = summer.Year == 1996
mask2 = summer.Sport == "Aquatics"

In [None]:
summer = summer.loc[~(mask1 | mask2)]

In [None]:
summer.head()

In [None]:
(summer.Year == 1996).value_counts()

In [None]:
1996 in summer.Year.values

In [None]:
summer.Sport.isin(["Aquatics"]).any()

In [None]:
(summer.Sport == "Aquatics").any()

### Adding new Columns to a DataFrame

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic["Zeros"] = "Zero"

In [None]:
titanic.head()

In [None]:
titanic.Ones = 1

In [None]:
titanic.head()

In [None]:
titanic.Ones

### Creating Columns based on other Columns

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
1912 - titanic.age

In [None]:
titanic["YoB"] = 1912 - titanic.age

In [None]:
titanic.head()

In [None]:
titanic.sibsp + titanic.parch

In [None]:
titanic["relatives"] = titanic.sibsp + titanic.parch

In [None]:
titanic.head()

In [None]:
titanic.drop(columns = ["sibsp", "parch"], inplace = True)

In [None]:
titanic.head()

In [None]:
inflation_factor = 10

In [None]:
titanic.fare*10

In [None]:
titanic.fare = titanic.fare*10

In [None]:
titanic.head()

### Adding Columns with insert()

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic["Test"] = "Test"

In [None]:
titanic.head()

In [None]:
relatives = titanic.sibsp + titanic.parch
relatives.head()


In [None]:
titanic.insert(loc = 6, column = "relatives", value = relatives)

In [None]:
titanic.head()

### Creating DataFrames from Scratch with pd.DataFrame()

In [None]:
import pandas as pd

#### Having Columns in place

In [None]:
player = ["Lionel Messi", "Cristiano Ronaldo", "Neymar Junior", "Kylian Mbappe", "Manuel Neuer"]

In [None]:
nationality = ["Argentina", "Portugal", "Brasil", "France", "Germany"]

In [None]:
club = ["FC Barcelona", "Juventus FC", "Paris SG", "Paris SG", "FC Bayern" ]

In [None]:
world_champion = [False, False, False, True, True]

In [None]:
height = [1.70, 1.87, 1.75, 1.78, 1.93]

In [None]:
goals = [45, 44, 28, 21, 0]

In [None]:
dic = {"Player":player, "Nationality":nationality, "Club":club, 
        "World_Champion":world_champion, "Height":height, "Goals_2018":goals
       }

In [None]:
dic

In [None]:
df = pd.DataFrame(data = dic)

In [None]:
df

In [None]:
players = df.set_index("Player")

In [None]:
players

#### Having Rows in place

In [None]:
list(zip(nationality, club, world_champion, height, goals))

In [None]:
zipped = list(zip(nationality, club, world_champion, height, goals))

In [None]:
messi, ronaldo, neymar, mbappe, neuer = zipped

In [None]:
messi

In [None]:
ronaldo

In [None]:
df = pd.DataFrame(data = [messi, ronaldo, neymar, mbappe, neuer],
             index = ["Lionel Messi", "Cristiano Ronaldo", "Neymar Junior", "Kylian Mbappe", "Manuel Neuer"],
             columns = ["Nationality", "Club", "World_Champion", "Height", "Goals_2018"]
            )

In [None]:
df

In [None]:
df2 = pd.Series(index = player, data = nationality, name = "Nationality").to_frame()

In [None]:
df2

In [None]:
df2["Club"] = club

In [None]:
df2

### Adding new Rows (hands-on approach)

#### Adding one Row

In [None]:
players

In [None]:
players.reset_index(inplace= True)

In [None]:
players

In [None]:
players.loc[5, :] = ["Sergio Ramos", "Spain", "Real Madrid", True, 1.84 ,5]

In [None]:
players

#### Adding many Rows

In [None]:
new = pd.DataFrame(
    data = [["Mohamed Salah", "Egypt", "FC Liverpool", False, 1.75, 44],
            ["Luis Suarez", "Uruguay", "FC Barcelona", False, 1.82, 31]],
    columns = players.columns
)

In [None]:
new

In [None]:
players = players.append(new, ignore_index= True)

In [None]:
players