In [1]:
import numpy as np
import pandas as pd
from django.utils import timezone

In [2]:
# create a serie
s = pd.Series([1, 3, 5, np.nan, 6, 8])
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [3]:
# Create 12 months range with pandas date_range
start_year = f"{timezone.now().year}0101"
print(start_year)
dates = pd.date_range(start_year, periods=12)
dates

20210101


DatetimeIndex(['2021-01-01', '2021-01-02', '2021-01-03', '2021-01-04',
               '2021-01-05', '2021-01-06', '2021-01-07', '2021-01-08',
               '2021-01-09', '2021-01-10', '2021-01-11', '2021-01-12'],
              dtype='datetime64[ns]', freq='D')

In [4]:
np.random.randn(12, 4)

array([[-0.34191308, -0.30829712, -0.29570654, -1.26841517],
       [ 0.4279046 ,  0.51225968, -0.75785567, -0.23867725],
       [-0.97270039, -0.43567303,  0.34658577,  1.63203427],
       [-0.01842612, -2.39296483, -0.56625456, -1.70149816],
       [-0.74022988,  0.35208757, -0.11293073, -0.21815413],
       [-0.34531203,  1.21218653,  0.58767859, -1.3272716 ],
       [ 0.64570453, -0.20524112, -1.73504673, -2.20786269],
       [-0.13148144, -0.32989992, -0.42529021,  1.56811417],
       [-1.98999069,  1.00746115,  0.50233718,  1.6453609 ],
       [-1.23042538,  0.36806073, -1.94798288,  0.17561352],
       [-0.45454444, -2.12849426, -0.5683983 ,  2.28616859],
       [ 2.0666228 , -0.80202818, -0.32704787, -0.4626088 ]])

In [5]:
# create a dataframe
# index = lignes or rows
df = pd.DataFrame(np.random.randn(12, 8), index=dates, columns=list("ABCDEFGH"))
df

Unnamed: 0,A,B,C,D,E,F,G,H
2021-01-01,0.393758,-2.028369,-0.686793,0.258483,0.645953,1.693384,-1.409146,0.564608
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447
2021-01-04,0.297152,0.005301,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,-0.675184,0.749245,-0.429739,0.477641
2021-01-06,0.303985,0.752355,0.31488,-1.943628,0.697532,0.184185,-0.46049,0.520644
2021-01-07,1.547681,1.319711,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056
2021-01-08,0.273539,-0.460832,-0.100266,2.173651,0.483372,-1.707574,-0.395359,-1.383488
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575
2021-01-10,-1.046841,-0.535687,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861


In [6]:
# select october avec le label
# attention au format US qui inverse mois et jour :)
df.loc["20210110"]

A   -1.046841
B   -0.535687
C    0.572368
D    1.439004
E   -0.234027
F    1.600794
G   -0.045217
H    0.085861
Name: 2021-01-10 00:00:00, dtype: float64

In [7]:
# sélection de lignes en utilisant un index iloc
df.iloc[8:10]

Unnamed: 0,A,B,C,D,E,F,G,H
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575
2021-01-10,-1.046841,-0.535687,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861


In [8]:
# df["B":"E"] ==> error, can't slice by columns
# slicing rows (indexes)
df["20210102":"20210104"]

Unnamed: 0,A,B,C,D,E,F,G,H
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447
2021-01-04,0.297152,0.005301,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576


In [9]:
# subset avec les mois juillet à octobre et uniquement les colonnes A, B et E
df.loc["20210107":"20210110", ['B', 'A', 'E']]

Unnamed: 0,B,A,E
2021-01-07,1.319711,1.547681,-0.813441
2021-01-08,-0.460832,0.273539,0.483372
2021-01-09,1.026298,3.270455,0.621163
2021-01-10,-0.535687,-1.046841,-0.234027


In [10]:
# seulement juillet à otobre et toutes les colonnes de B à E
df.loc[["20210107","20210110"], "B":"E"]

Unnamed: 0,B,C,D,E
2021-01-07,1.319711,0.529188,2.892594,-0.813441
2021-01-10,-0.535687,0.572368,1.439004,-0.234027


In [11]:
# accès à une colonne en utilisant un attribut
df.D

2021-01-01    0.258483
2021-01-02    1.733062
2021-01-03    1.281035
2021-01-04    0.017314
2021-01-05   -0.715588
2021-01-06   -1.943628
2021-01-07    2.892594
2021-01-08    2.173651
2021-01-09   -0.358152
2021-01-10    1.439004
2021-01-11    1.317318
2021-01-12   -0.220819
Freq: D, Name: D, dtype: float64

In [12]:
# accès à un élément précis
df.D["20210108"]

2.1736511355207733

In [13]:
# identique à :
df["D"]["20210108"]

2.1736511355207733

In [14]:
df["D"]["20210108"] = "Hello"
df["D"]["20210108"]

'Hello'

In [15]:
df

Unnamed: 0,A,B,C,D,E,F,G,H
2021-01-01,0.393758,-2.028369,-0.686793,0.258483,0.645953,1.693384,-1.409146,0.564608
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447
2021-01-04,0.297152,0.005301,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,-0.675184,0.749245,-0.429739,0.477641
2021-01-06,0.303985,0.752355,0.31488,-1.943628,0.697532,0.184185,-0.46049,0.520644
2021-01-07,1.547681,1.319711,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056
2021-01-08,0.273539,-0.460832,-0.100266,Hello,0.483372,-1.707574,-0.395359,-1.383488
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575
2021-01-10,-1.046841,-0.535687,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861


In [16]:
# add a new colum
df['NEW'] = list(range(len(df.index)))
df

Unnamed: 0,A,B,C,D,E,F,G,H,NEW
2021-01-01,0.393758,-2.028369,-0.686793,0.258483,0.645953,1.693384,-1.409146,0.564608,0
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241,1
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447,2
2021-01-04,0.297152,0.005301,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576,3
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,-0.675184,0.749245,-0.429739,0.477641,4
2021-01-06,0.303985,0.752355,0.31488,-1.943628,0.697532,0.184185,-0.46049,0.520644,5
2021-01-07,1.547681,1.319711,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056,6
2021-01-08,0.273539,-0.460832,-0.100266,Hello,0.483372,-1.707574,-0.395359,-1.383488,7
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575,8
2021-01-10,-1.046841,-0.535687,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861,9


In [17]:
# copy
dfa = df.copy()
# swap B and NEW values
dfa.loc[:, ['B', 'NEW']] = dfa[['NEW', 'B']].to_numpy()
dfa

Unnamed: 0,A,B,C,D,E,F,G,H,NEW
2021-01-01,0.393758,0.0,-0.686793,0.258483,0.645953,1.693384,-1.409146,0.564608,-2.028369
2021-01-02,0.390225,1.0,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241,1.395525
2021-01-03,-0.347647,2.0,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447,-0.349562
2021-01-04,0.297152,3.0,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576,0.005301
2021-01-05,-2.444972,4.0,-0.317195,-0.715588,-0.675184,0.749245,-0.429739,0.477641,-1.046968
2021-01-06,0.303985,5.0,0.31488,-1.943628,0.697532,0.184185,-0.46049,0.520644,0.752355
2021-01-07,1.547681,6.0,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056,1.319711
2021-01-08,0.273539,7.0,-0.100266,Hello,0.483372,-1.707574,-0.395359,-1.383488,-0.460832
2021-01-09,3.270455,8.0,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575,1.026298
2021-01-10,-1.046841,9.0,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861,-0.535687


In [18]:
# check df is unchanged
df

Unnamed: 0,A,B,C,D,E,F,G,H,NEW
2021-01-01,0.393758,-2.028369,-0.686793,0.258483,0.645953,1.693384,-1.409146,0.564608,0
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241,1
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447,2
2021-01-04,0.297152,0.005301,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576,3
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,-0.675184,0.749245,-0.429739,0.477641,4
2021-01-06,0.303985,0.752355,0.31488,-1.943628,0.697532,0.184185,-0.46049,0.520644,5
2021-01-07,1.547681,1.319711,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056,6
2021-01-08,0.273539,-0.460832,-0.100266,Hello,0.483372,-1.707574,-0.395359,-1.383488,7
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575,8
2021-01-10,-1.046841,-0.535687,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861,9


In [20]:
# sélectionner uniquement les lignes pour lesquelles la valeur de la cellule en 1 est supérieur à 1
df[df["A"] > 1]

Unnamed: 0,A,B,C,D,E,F,G,H,NEW
2021-01-07,1.547681,1.319711,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056,6
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575,8
2021-01-12,1.158957,-0.312945,-1.077172,-0.220819,-1.024764,0.970638,0.037499,0.49348,11


In [37]:
# on change la valeur "hello" pour une valeur numérique
# on utilise .at
df.at["20210108", "D"] = 1000
# on ne sélectionne que les valeurs qui sont plus grandes que 0
df[df > 0]

Unnamed: 0,A,B,C,D,E,F,G,H,NEW,I
2021-01-01,0.393758,,,0.258483,0.645953,1.693384,,0.564608,,
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,,1.418648,,1.0,1.0
2021-01-03,,,,1.281035,,,,0.195447,2.0,2.0
2021-01-04,0.297152,0.005301,1.151483,0.017314,,,0.015547,0.469576,3.0,3.0
2021-01-05,,,,,,0.749245,,0.477641,4.0,4.0
2021-01-06,0.303985,0.752355,0.31488,,0.697532,0.184185,,0.520644,5.0,5.0
2021-01-07,1.547681,1.319711,0.529188,2.892594,,,0.264105,,6.0,6.0
2021-01-08,0.273539,,,1000.0,0.483372,,,,7.0,
2021-01-09,3.270455,1.026298,0.125927,,0.621163,,1.4001,,8.0,
2021-01-10,,,0.572368,1.439004,,1.600794,,0.085861,9.0,


In [24]:
# transpose la matrice
df.T

Unnamed: 0,2021-01-01,2021-01-02,2021-01-03,2021-01-04,2021-01-05,2021-01-06,2021-01-07,2021-01-08,2021-01-09,2021-01-10,2021-01-11,2021-01-12
A,0.393758,0.390225,-0.347647,0.297152,-2.444972,0.303985,1.547681,0.273539,3.270455,-1.046841,-0.807206,1.158957
B,-2.028369,1.395525,-0.349562,0.005301,-1.046968,0.752355,1.319711,-0.460832,1.026298,-0.535687,0.189085,-0.312945
C,-0.686793,0.254739,-0.374687,1.151483,-0.317195,0.31488,0.529188,-0.100266,0.125927,0.572368,-0.396596,-1.077172
D,0.258483,1.733062,1.281035,0.017314,-0.715588,-1.943628,2.892594,100.0,-0.358152,1.439004,1.317318,-0.220819
E,0.645953,0.626094,-0.065442,-0.377726,-0.675184,0.697532,-0.813441,0.483372,0.621163,-0.234027,0.668276,-1.024764
F,1.693384,-1.235898,-0.245769,-0.655517,0.749245,0.184185,-0.734006,-1.707574,-0.014298,1.600794,-1.215798,0.970638
G,-1.409146,1.418648,-1.924409,0.015547,-0.429739,-0.46049,0.264105,-0.395359,1.4001,-0.045217,0.670182,0.037499
H,0.564608,-0.220241,0.195447,0.469576,0.477641,0.520644,-0.279056,-1.383488,-0.6575,0.085861,-1.397497,0.49348
NEW,0.0,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,11.0


In [26]:
# copy only a subset of the dataframe (6 first lines)
df2 = df.iloc[0:6].copy()
# replace column value
df2["E"] = ["one", "one", "two", "three", "four", "three"]
# select only rows if value in column E is two or four
df2[df2["E"].isin(["two", "four"])]

Unnamed: 0,A,B,C,D,E,F,G,H,NEW
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,two,-0.245769,-1.924409,0.195447,2
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,four,0.749245,-0.429739,0.477641,4


In [27]:
# sélection basée sur une valeur booléenne
df2["E"].isin(["two", "four"])

2021-01-01    False
2021-01-02    False
2021-01-03     True
2021-01-04    False
2021-01-05     True
2021-01-06    False
Freq: D, Name: E, dtype: bool

In [32]:
# sélectionne les lignes impaires
df2[[i%2 == 0 for i in range(6)]]

Unnamed: 0,A,B,C,D,E,F,G,H,NEW
2021-01-01,0.393758,-2.028369,-0.686793,0.258483,one,1.693384,-1.409146,0.564608,0
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,two,-0.245769,-1.924409,0.195447,2
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,four,0.749245,-0.429739,0.477641,4


In [35]:
# créer une série qui ne couvre que partiellement les index de df
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range("20210102", periods=6))
s1

2021-01-02    1
2021-01-03    2
2021-01-04    3
2021-01-05    4
2021-01-06    5
2021-01-07    6
Freq: D, dtype: int64

In [36]:
# ajouter cette colonne à df, remarquez que c'est placé correctement dans le tableau
df["I"] = s1
df

Unnamed: 0,A,B,C,D,E,F,G,H,NEW,I
2021-01-01,0.393758,-2.028369,-0.686793,0.258483,0.645953,1.693384,-1.409146,0.564608,0,
2021-01-02,0.390225,1.395525,0.254739,1.733062,0.626094,-1.235898,1.418648,-0.220241,1,1.0
2021-01-03,-0.347647,-0.349562,-0.374687,1.281035,-0.065442,-0.245769,-1.924409,0.195447,2,2.0
2021-01-04,0.297152,0.005301,1.151483,0.017314,-0.377726,-0.655517,0.015547,0.469576,3,3.0
2021-01-05,-2.444972,-1.046968,-0.317195,-0.715588,-0.675184,0.749245,-0.429739,0.477641,4,4.0
2021-01-06,0.303985,0.752355,0.31488,-1.943628,0.697532,0.184185,-0.46049,0.520644,5,5.0
2021-01-07,1.547681,1.319711,0.529188,2.892594,-0.813441,-0.734006,0.264105,-0.279056,6,6.0
2021-01-08,0.273539,-0.460832,-0.100266,100.0,0.483372,-1.707574,-0.395359,-1.383488,7,
2021-01-09,3.270455,1.026298,0.125927,-0.358152,0.621163,-0.014298,1.4001,-0.6575,8,
2021-01-10,-1.046841,-0.535687,0.572368,1.439004,-0.234027,1.600794,-0.045217,0.085861,9,


In [43]:
# in place modification of the data
# with "where" sélection
df2 = df.iloc[0:4, 0:4].copy().fillna(0)
df2[df2 > 0] = -df2
df2

Unnamed: 0,A,B,C,D
2021-01-01,-0.393758,-2.028369,-0.686793,-0.258483
2021-01-02,-0.390225,-1.395525,-0.254739,-1.733062
2021-01-03,-0.347647,-0.349562,-0.374687,-1.281035
2021-01-04,-0.297152,-0.005301,-1.151483,-0.017314


Unnamed: 0,A,B,C,D
2021-01-01,0.393758,2.028369,0.686793,0.258483
2021-01-02,0.390225,1.395525,0.254739,1.733062
2021-01-03,0.347647,0.349562,0.374687,1.281035
2021-01-04,0.297152,0.005301,1.151483,0.017314
