- Pandas?
- Series?
- DataFrames?
- Create df
- Create/update columns
- Vectorized operations
- Change values
- .loc => filtering

In [266]:
from typing import List
# %conda install pandas
import pandas as pd

example_list: List[int] = [1,2,3,4,5]

In [267]:
s1 = pd.Series(example_list)
s1.index = ["A", "B", "C", "D", "E"]
s1["A"] = 23
s1

A    23
B     2
C     3
D     4
E     5
dtype: int64

In [268]:
s1.index = range(5)
s1

0    23
1     2
2     3
3     4
4     5
dtype: int64

In [269]:
# Create df from dictionary
names = ["Nate", "Rebecca", "Edwin", "Preston"]
ages = [22, 19, 26, 18]
year = ["senior", "sophomore", "senior", "freshman"]

students = {
    "Name": names,
    "Age": ages,
    "Year": year
}
students_df = pd.DataFrame(data=students)
students_df.index = [1,2,3,4]
students_df

Unnamed: 0,Name,Age,Year
1,Nate,22,senior
2,Rebecca,19,sophomore
3,Edwin,26,senior
4,Preston,18,freshman


In [270]:
import random
# Create df from list of rows

players = ["Messi", "Mbappe", "Ronaldo", "Sala"]

cols = ["Name", "Goals/Game", "Games"]
data = [
    [player, random.random(), round(random.random() *1000)]
    for player in players
    ]
players_df = pd.DataFrame(data=data, columns=cols)
players_df

Unnamed: 0,Name,Goals/Game,Games
0,Messi,0.181264,201
1,Mbappe,0.54893,652
2,Ronaldo,0.36141,857
3,Sala,0.143373,389


In [271]:
# Create/update columns
players_df["Name"]

0      Messi
1     Mbappe
2    Ronaldo
3       Sala
Name: Name, dtype: str

In [272]:
players_df["Name"][0]

'Messi'

In [273]:
players_df["new_col"] = [i+1 for i in range (len(players_df))]
players_df

Unnamed: 0,Name,Goals/Game,Games,new_col
0,Messi,0.181264,201,1
1,Mbappe,0.54893,652,2
2,Ronaldo,0.36141,857,3
3,Sala,0.143373,389,4


In [274]:
[i+1 for i in range (len(players_df))]

[1, 2, 3, 4]

In [275]:
len(players_df)

4

In [276]:
players_df["new_col"] = round(players_df["Goals/Game"]*players_df["Games"])
players_df

Unnamed: 0,Name,Goals/Game,Games,new_col
0,Messi,0.181264,201,36.0
1,Mbappe,0.54893,652,358.0
2,Ronaldo,0.36141,857,310.0
3,Sala,0.143373,389,56.0


In [277]:
# incline vs declarative
# players_df.rename(columns={"new_col": "Goals"}, inplace=True)
players_df = players_df.rename(columns={"new_col": "Goals"})
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,Messi,0.181264,201,36.0
1,Mbappe,0.54893,652,358.0
2,Ronaldo,0.36141,857,310.0
3,Sala,0.143373,389,56.0


In [278]:
players_df["Name"] = players_df["Name"].str.lower()
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,messi,0.181264,201,36.0
1,mbappe,0.54893,652,358.0
2,ronaldo,0.36141,857,310.0
3,sala,0.143373,389,56.0


In [279]:
players_df["Name"] = players_df["Name"].str.upper()
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,MESSI,0.181264,201,36.0
1,MBAPPE,0.54893,652,358.0
2,RONALDO,0.36141,857,310.0
3,SALA,0.143373,389,56.0


In [280]:
players_df[["Name", "Goals"]]

Unnamed: 0,Name,Goals
0,MESSI,36.0
1,MBAPPE,358.0
2,RONALDO,310.0
3,SALA,56.0


In [281]:
# .loc[]

players_df = players_df.set_index("Name")
players_df

Unnamed: 0_level_0,Goals/Game,Games,Goals
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
MESSI,0.181264,201,36.0
MBAPPE,0.54893,652,358.0
RONALDO,0.36141,857,310.0
SALA,0.143373,389,56.0


In [282]:
players_df.loc["MBAPPE", "Games"]

np.int64(652)

In [283]:
players_df = players_df.reset_index()
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals
0,MESSI,0.181264,201,36.0
1,MBAPPE,0.54893,652,358.0
2,RONALDO,0.36141,857,310.0
3,SALA,0.143373,389,56.0


In [284]:
players_df.loc[0]

Name             MESSI
Goals/Game    0.181264
Games              201
Goals             36.0
Name: 0, dtype: object

In [285]:
players_df.loc[1:2,"Games":"Goals"]

Unnamed: 0,Games,Goals
1,652,358.0
2,857,310.0


In [286]:
players_df.loc[[0,2,3]]

Unnamed: 0,Name,Goals/Game,Games,Goals
0,MESSI,0.181264,201,36.0
2,RONALDO,0.36141,857,310.0
3,SALA,0.143373,389,56.0


In [287]:
players_df.loc[players_df["Goals/Game"]>0.5]

Unnamed: 0,Name,Goals/Game,Games,Goals
1,MBAPPE,0.54893,652,358.0


In [288]:
players_df.loc[players_df["Goals/Game"]>0.5, "Name"]

1    MBAPPE
Name: Name, dtype: str

In [289]:
players_df['Team'] = ["FC Barca", "PSG", "PSG", "Liverpool"]
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals,Team
0,MESSI,0.181264,201,36.0,FC Barca
1,MBAPPE,0.54893,652,358.0,PSG
2,RONALDO,0.36141,857,310.0,PSG
3,SALA,0.143373,389,56.0,Liverpool


In [291]:
players_df.loc[players_df["Team"] == "PSG", "Team"] = "Paris St. Germain"
players_df

Unnamed: 0,Name,Goals/Game,Games,Goals,Team
0,MESSI,0.181264,201,36.0,FC Barca
1,MBAPPE,0.54893,652,358.0,Paris St. Germain
2,RONALDO,0.36141,857,310.0,Paris St. Germain
3,SALA,0.143373,389,56.0,Liverpool
