In [3]:
import pandas as pd

In [2]:
s = pd.Series([0.2, 0.3, 0.8, 0.99])

In [3]:
s

0    0.20
1    0.30
2    0.80
3    0.99
dtype: float64

In [4]:
s[0]

0.2

In [5]:
s.values

array([0.2 , 0.3 , 0.8 , 0.99])

In [6]:
s.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
s > 0.5 # mask 

0    False
1    False
2     True
3     True
dtype: bool

In [9]:
s[(s > 0.5) & (s < 0.9)]

2    0.8
dtype: float64

In [10]:
s * 2

0    0.40
1    0.60
2    1.60
3    1.98
dtype: float64

In [11]:
s.mean()

0.5725

In [16]:
7 in s

False

In [18]:
1 in s

True

In [20]:
s[0:2]

0    0.2
1    0.3
dtype: float64

In [42]:
panier = pd.Series({"pomme": 5, "kiwi": 10, "mure": 3, "mangue": 1})
panier_bis = pd.Series(index=["pomme", "kiwi", "mure", "litchi"], data=[5, 10, 3, 1])

In [40]:
panier_bis

pomme      5
kiwi      10
mure       3
mangue     1
dtype: int64

In [24]:
panier.sum()

19

In [25]:
panier[["mure", "mangue"]]

mure      3
mangue    1
dtype: int64

In [31]:
s + panier # NaN -> Not a Number

0        NaN
1        NaN
2        NaN
3        NaN
kiwi     NaN
mangue   NaN
mure     NaN
pomme    NaN
dtype: float64

In [32]:
s

0    0.20
1    0.30
2    0.80
3    0.99
dtype: float64

In [33]:
sbis = pd.Series([1, 2, 6, 7, 9])

In [34]:
sbis

0    1
1    2
2    6
3    7
4    9
dtype: int64

In [35]:
s + sbis

0    1.20
1    2.30
2    6.80
3    7.99
4     NaN
dtype: float64

In [44]:
big_panier = panier + panier_bis

In [46]:
big_panier[big_panier.isnull()]

litchi   NaN
mangue   NaN
dtype: float64

In [47]:
big_panier[big_panier.notnull()]

kiwi     20.0
mure      6.0
pomme    10.0
dtype: float64

## DataFrame

In [41]:
data = {
    "city": ["Paris", "London", "Berlin"],
    "density": [3550, 5100, 3750],
    "area": [2723, 1623, 984],
    "population": [9645000, 8278000, 3675000],
}

df = pd.DataFrame(data)

In [49]:
df

Unnamed: 0,city,density,area,population
0,Paris,3550,2723,9645000
1,London,5100,1623,8278000
2,Berlin,3750,984,3675000


In [50]:
df.columns

Index(['city', 'density', 'area', 'population'], dtype='object')

In [51]:
df["area"]

0    2723
1    1623
2     984
Name: area, dtype: int64

In [52]:
df.dtypes

city          object
density        int64
area           int64
population     int64
dtype: object

In [53]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   city        3 non-null      object
 1   density     3 non-null      int64 
 2   area        3 non-null      int64 
 3   population  3 non-null      int64 
dtypes: int64(3), object(1)
memory usage: 224.0+ bytes


In [54]:
df.describe()

Unnamed: 0,density,area,population
count,3.0,3.0,3.0
mean,4133.333333,1776.666667,7199333.0
std,843.109325,879.62511,3127757.0
min,3550.0,984.0,3675000.0
25%,3650.0,1303.5,5976500.0
50%,3750.0,1623.0,8278000.0
75%,4425.0,2173.0,8961500.0
max,5100.0,2723.0,9645000.0


In [55]:
df.values

array([['Paris', 3550, 2723, 9645000],
       ['London', 5100, 1623, 8278000],
       ['Berlin', 3750, 984, 3675000]], dtype=object)

In [56]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [57]:
df.set_index("city")

Unnamed: 0_level_0,density,area,population
city,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Paris,3550,2723,9645000
London,5100,1623,8278000
Berlin,3750,984,3675000


In [58]:
df

Unnamed: 0,city,density,area,population
0,Paris,3550,2723,9645000
1,London,5100,1623,8278000
2,Berlin,3750,984,3675000


In [59]:
# df = df.set_index("city")
# or
# df.set_index("city", inplace=True)

# immutable by default

In [6]:
df["population"] / df["area"]

0    3542.049210
1    5100.431300
2    3734.756098
dtype: float64

In [7]:
df["real_density"] = df["population"] / df["area"]

In [8]:
df

Unnamed: 0,city,density,area,population,real_density
0,Paris,3550,2723,9645000,3542.04921
1,London,5100,1623,8278000,5100.4313
2,Berlin,3750,984,3675000,3734.756098


In [14]:
df[df["density"] > 4000]

Unnamed: 0,city,density,area,population,real_density
1,London,5100,1623,8278000,5100.4313


In [17]:
df[df["density"] > 4000][["city", "population"]]

# select city, population
# from df
# where density > 4000

Unnamed: 0,city,population
1,London,8278000


In [18]:
df

Unnamed: 0,city,density,area,population,real_density
0,Paris,3550,2723,9645000,3542.04921
1,London,5100,1623,8278000,5100.4313
2,Berlin,3750,984,3675000,3734.756098


In [20]:
df.sort_index(ascending=False)  # pour trier l'index

Unnamed: 0,city,density,area,population,real_density
2,Berlin,3750,984,3675000,3734.756098
1,London,5100,1623,8278000,5100.4313
0,Paris,3550,2723,9645000,3542.04921


In [25]:
df.sort_values("density", ascending=False)  # trier la dataframe en se basant sur une colonne

Unnamed: 0,city,density,area,population,real_density
1,London,5100,1623,8278000,5100.4313
2,Berlin,3750,984,3675000,3734.756098
0,Paris,3550,2723,9645000,3542.04921


In [24]:
df.sort_values("density")

Unnamed: 0,city,density,area,population,real_density
0,Paris,3550,2723,9645000,3542.04921
2,Berlin,3750,984,3675000,3734.756098
1,London,5100,1623,8278000,5100.4313


In [26]:
df

Unnamed: 0,city,density,area,population,real_density
0,Paris,3550,2723,9645000,3542.04921
1,London,5100,1623,8278000,5100.4313
2,Berlin,3750,984,3675000,3734.756098


In [28]:
df.iloc[1, 3]
df.loc[1, "population"]

8278000

In [32]:
df.set_index("city").loc["London", "population"]

8278000

In [34]:
df.loc[df["density"] > 4000, ["city", "population"]]

Unnamed: 0,city,population
1,London,8278000


In [35]:
df

Unnamed: 0,city,density,area,population,real_density
0,Paris,3550,2723,9645000,3542.04921
1,London,5100,1623,8278000,5100.4313
2,Berlin,3750,984,3675000,3734.756098


In [39]:
df.loc[0:1, "density":"area"] = 2

In [42]:
df

Unnamed: 0,city,density,area,population
0,Paris,3550,2723,9645000
1,London,5100,1623,8278000
2,Berlin,3750,984,3675000
