# La bibliothèque Pandas

In [1]:
import pandas as pd 
import numpy as np

In [2]:
pd.__version__

'1.4.2'

In [3]:
data = pd.Series([5,10,15,20])

In [4]:
data

0     5
1    10
2    15
3    20
dtype: int64

In [5]:
data.values

array([ 5, 10, 15, 20], dtype=int64)

In [6]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
data[0]

5

In [8]:
data[2]

15

In [9]:
data[1:3]

1    10
2    15
dtype: int64

In [10]:
data[:2]

0     5
1    10
dtype: int64

In [11]:
liste = [1,2,3,4,5]
indices = ['a','b',4,'c','ab']
serie = pd.Series(data = liste, index = indices)

In [12]:
serie

a     1
b     2
4     3
c     4
ab    5
dtype: int64

In [13]:
liste = [1,2,3,4,5]
indices = ['a','b',4,'c','ab']
serie1 = pd.Series(liste, indices)

In [14]:
serie1

a     1
b     2
4     3
c     4
ab    5
dtype: int64

In [15]:
serie2 = pd.Series([5,10,15,20,25],['a','b','ab',3,4])

In [16]:
serie2

a      5
b     10
ab    15
3     20
4     25
dtype: int64

In [17]:
serie2['a']

5

In [18]:
serie2['ab']

15

In [19]:
notes_maths_dic = {'Christophe': 17, 'Clara': 10, 'Amine': 13, 'Amandine': 10.5, 'Lina': 13.5}

In [20]:
notes_maths = pd.Series(notes_maths_dic)

In [21]:
notes_maths

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
dtype: float64

In [22]:
notes_maths['Christophe']

17.0

In [23]:
notes_maths['Lina']

13.5

In [24]:
notes_maths1 = pd.Series(notes_maths_dic, index = ['Clara', 'Amine'])

In [25]:
notes_maths1

Clara    10.0
Amine    13.0
dtype: float64

In [26]:
data = pd.Series([5,10,20,30,40], index = ['a','b','c','d','e'])

In [27]:
data

a     5
b    10
c    20
d    30
e    40
dtype: int64

# Modifier Les données d'une série pandas

In [28]:
data['a'] = 100

In [29]:
data

a    100
b     10
c     20
d     30
e     40
dtype: int64

In [30]:
data['x'] = 1000

In [31]:
data

a     100
b      10
c      20
d      30
e      40
x    1000
dtype: int64

In [32]:
data['y'] = 2000

In [33]:
data

a     100
b      10
c      20
d      30
e      40
x    1000
y    2000
dtype: int64

In [34]:
notes_maths

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
dtype: float64

# Filtrer les données d'une série Pandas

In [35]:
notes_12 = notes_maths[notes_maths >= 12]

In [36]:
notes_12

Christophe    17.0
Amine         13.0
Lina          13.5
dtype: float64

In [37]:
notes_10_15 = notes_maths[(notes_maths <= 15) & (notes_maths >= 10)]

In [38]:
notes_10_15

Clara       10.0
Amine       13.0
Amandine    10.5
Lina        13.5
dtype: float64

# Création d'un dataFrame à partir d'une série Pandas

In [39]:
notes_maths

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
dtype: float64

In [40]:
dataFrame_maths = pd.DataFrame(notes_maths, columns = ['Notes'])

In [41]:
dataFrame_maths

Unnamed: 0,Notes
Christophe,17.0
Clara,10.0
Amine,13.0
Amandine,10.5
Lina,13.5


# Création d'un dataframe à partir d'une liste de dictionnaires

In [42]:
d1 = {'a':5, 'b':10, 'c':15, 'd':20}
d2 = {'a':25, 'b':30, 'c':35, 'd':40}
d3 = {'a':45, 'b':50, 'c':55, 'd':60}

In [43]:
d1

{'a': 5, 'b': 10, 'c': 15, 'd': 20}

In [44]:
d2

{'a': 25, 'b': 30, 'c': 35, 'd': 40}

In [45]:
d3

{'a': 45, 'b': 50, 'c': 55, 'd': 60}

In [46]:
dataframe_dic = pd.DataFrame([d1,d2,d3])

In [47]:
dataframe_dic

Unnamed: 0,a,b,c,d
0,5,10,15,20
1,25,30,35,40
2,45,50,55,60


In [48]:
d4 = {'a':5, 'b':10, 'c':15, 'd':20}
d5 = {'a':25, 'd':40}
d6 = {'a':45, 'b':50, 'd':60}

In [49]:
dataframe1 = pd.DataFrame([d4,d5,d6])

In [50]:
dataframe1

Unnamed: 0,a,b,c,d
0,5,10.0,15.0,20
1,25,,,40
2,45,50.0,,60


In [51]:
data_liste_dic = [{'a':i, 'b':2*i, 'c': i*i} for i in range(10)]

In [52]:
data_liste_dic

[{'a': 0, 'b': 0, 'c': 0},
 {'a': 1, 'b': 2, 'c': 1},
 {'a': 2, 'b': 4, 'c': 4},
 {'a': 3, 'b': 6, 'c': 9},
 {'a': 4, 'b': 8, 'c': 16},
 {'a': 5, 'b': 10, 'c': 25},
 {'a': 6, 'b': 12, 'c': 36},
 {'a': 7, 'b': 14, 'c': 49},
 {'a': 8, 'b': 16, 'c': 64},
 {'a': 9, 'b': 18, 'c': 81}]

In [53]:
dataframe_dic = pd.DataFrame(data_liste_dic)

In [54]:
dataframe_dic

Unnamed: 0,a,b,c
0,0,0,0
1,1,2,1
2,2,4,4
3,3,6,9
4,4,8,16
5,5,10,25
6,6,12,36
7,7,14,49
8,8,16,64
9,9,18,81


# Construction d’un Dataframe à partir de plusieurs series Pandas

In [55]:
notes_maths_dic = {'Christophe': 17, 'Clara': 10, 'Amine': 13, 'Amandine': 10.5, 'Lina': 13.5}
notes_maths = pd.Series(notes_maths_dic)

In [56]:
notes_physique_dic = {'Christophe': 15, 'Clara': 8, 'Amine':10.5, 'Amandine': 17, 'Lina': 13.5}
notes_physique = pd.Series(notes_physique_dic)

In [57]:
notes_maths

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
dtype: float64

In [58]:
notes_physique

Christophe    15.0
Clara          8.0
Amine         10.5
Amandine      17.0
Lina          13.5
dtype: float64

In [59]:
notes = pd.DataFrame({'Maths': notes_maths, 'Physique': notes_physique})

In [60]:
notes

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [61]:
notes.index

Index(['Christophe', 'Clara', 'Amine', 'Amandine', 'Lina'], dtype='object')

In [62]:
notes.columns

Index(['Maths', 'Physique'], dtype='object')

In [63]:
notes['Maths']

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
Name: Maths, dtype: float64

In [64]:
notes['Physique']

Christophe    15.0
Clara          8.0
Amine         10.5
Amandine      17.0
Lina          13.5
Name: Physique, dtype: float64

# Construction d’un dataframe à partir d’une matrice Numpy

In [65]:
matrice = np.random.rand(3,4)

In [66]:
matrice

array([[0.42408865, 0.31343267, 0.64679699, 0.07254364],
       [0.60706752, 0.24716959, 0.4013532 , 0.69066675],
       [0.1253936 , 0.81288525, 0.12587371, 0.39576586]])

In [67]:
dataframe1 = pd.DataFrame(matrice, columns = ['col1', 'col2','col3','col4'], index = ['linge1','linge2', 'ligne3'])

In [68]:
dataframe1

Unnamed: 0,col1,col2,col3,col4
linge1,0.424089,0.313433,0.646797,0.072544
linge2,0.607068,0.24717,0.401353,0.690667
ligne3,0.125394,0.812885,0.125874,0.395766


In [69]:
data = np.arange(10,22).reshape(3,4)

In [70]:
data

array([[10, 11, 12, 13],
       [14, 15, 16, 17],
       [18, 19, 20, 21]])

In [71]:
dataframe2 = pd.DataFrame(data, columns = ['c1','c2','c3', 'c4'], index = ['Maths','Physique','Informatique'])

In [72]:
dataframe2

Unnamed: 0,c1,c2,c3,c4
Maths,10,11,12,13
Physique,14,15,16,17
Informatique,18,19,20,21


In [73]:
notes

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [74]:
notes['Moyenne'] = (notes['Maths'] + notes['Physique'])/2

In [75]:
notes

Unnamed: 0,Maths,Physique,Moyenne
Christophe,17.0,15.0,16.0
Clara,10.0,8.0,9.0
Amine,13.0,10.5,11.75
Amandine,10.5,17.0,13.75
Lina,13.5,13.5,13.5


In [76]:
notes[['Maths','Physique']]

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [77]:
notes

Unnamed: 0,Maths,Physique,Moyenne
Christophe,17.0,15.0,16.0
Clara,10.0,8.0,9.0
Amine,13.0,10.5,11.75
Amandine,10.5,17.0,13.75
Lina,13.5,13.5,13.5


In [78]:
notes.drop('Moyenne', axis = 1)

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [79]:
notes

Unnamed: 0,Maths,Physique,Moyenne
Christophe,17.0,15.0,16.0
Clara,10.0,8.0,9.0
Amine,13.0,10.5,11.75
Amandine,10.5,17.0,13.75
Lina,13.5,13.5,13.5


In [80]:
notes_modif = notes.drop('Moyenne', axis=1)

In [81]:
notes_modif

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [82]:
notes

Unnamed: 0,Maths,Physique,Moyenne
Christophe,17.0,15.0,16.0
Clara,10.0,8.0,9.0
Amine,13.0,10.5,11.75
Amandine,10.5,17.0,13.75
Lina,13.5,13.5,13.5


In [83]:
notes.drop('Moyenne', axis = 1, inplace = True)

In [84]:
notes

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [85]:
notes = notes.drop('Physique', axis = 1)

In [86]:
notes

Unnamed: 0,Maths
Christophe,17.0
Clara,10.0
Amine,13.0
Amandine,10.5
Lina,13.5


In [87]:
notes.drop('Clara')

Unnamed: 0,Maths
Christophe,17.0
Amine,13.0
Amandine,10.5
Lina,13.5


In [88]:
notes

Unnamed: 0,Maths
Christophe,17.0
Clara,10.0
Amine,13.0
Amandine,10.5
Lina,13.5


In [89]:
notes.drop('Clara', axis = 0)

Unnamed: 0,Maths
Christophe,17.0
Amine,13.0
Amandine,10.5
Lina,13.5


In [90]:
notes

Unnamed: 0,Maths
Christophe,17.0
Clara,10.0
Amine,13.0
Amandine,10.5
Lina,13.5


In [91]:
notes.drop('Clara', axis = 0, inplace = True)

In [92]:
notes

Unnamed: 0,Maths
Christophe,17.0
Amine,13.0
Amandine,10.5
Lina,13.5


In [93]:
notes = notes.drop('Amine')

In [94]:
notes

Unnamed: 0,Maths
Christophe,17.0
Amandine,10.5
Lina,13.5


# Sélection des données d’un Dataframe

In [95]:
notes_maths_dic = {'Christophe': 17, 'Clara': 10, 'Amine': 13, 'Amandine': 10.5, 'Lina': 13.5}
notes_maths = pd.Series(notes_maths_dic)

notes_physique_dic = {'Christophe': 15, 'Clara': 8, 'Amine':10.5, 'Amandine': 17, 'Lina': 13.5}
notes_physique = pd.Series(notes_physique_dic)

notes = pd.DataFrame({'Maths': notes_maths, 'Physique': notes_physique})

notes['Moyenne'] = (notes['Maths'] + notes['Physique'])/2

In [96]:
notes

Unnamed: 0,Maths,Physique,Moyenne
Christophe,17.0,15.0,16.0
Clara,10.0,8.0,9.0
Amine,13.0,10.5,11.75
Amandine,10.5,17.0,13.75
Lina,13.5,13.5,13.5


In [97]:
notes['Maths']

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
Name: Maths, dtype: float64

In [98]:
notes.Maths

Christophe    17.0
Clara         10.0
Amine         13.0
Amandine      10.5
Lina          13.5
Name: Maths, dtype: float64

In [99]:
notes.Maths is notes['Maths']

True

In [100]:
notes[['Maths', 'Physique']]

Unnamed: 0,Maths,Physique
Christophe,17.0,15.0
Clara,10.0,8.0
Amine,13.0,10.5
Amandine,10.5,17.0
Lina,13.5,13.5


In [101]:
notes

Unnamed: 0,Maths,Physique,Moyenne
Christophe,17.0,15.0,16.0
Clara,10.0,8.0,9.0
Amine,13.0,10.5,11.75
Amandine,10.5,17.0,13.75
Lina,13.5,13.5,13.5


In [102]:
notes['Christphe']

KeyError: 'Christphe'

In [None]:
notes.loc['Christophe']

In [None]:
notes.iloc[1]

In [None]:
notes.loc['Amine', 'Moyenne']

In [None]:
notes.loc['Lina', 'Maths']

In [None]:
notes.loc[['Amine', 'Amandine'],['Maths', 'Physique']]

# Sélection par tranche

In [None]:
notes_maths_dic = {'Christophe': 17, 'Clara': 10, 'Amine': 13, 'Amandine': 10.5, 'Lina': 13.5}
notes_maths = pd.Series(notes_maths_dic)

notes_physique_dic = {'Christophe': 15, 'Clara': 8, 'Amine':10.5, 'Amandine': 17, 'Lina': 13.5}
notes_physique = pd.Series(notes_physique_dic)

notes_informatique_dic = {'Christophe': 12, 'Clara': 11, 'Amine': 8, 'Amandine': 14, 'Lina': 17}
notes_informatique = pd.Series(notes_informatique_dic)

notes_anglais_dic = {'Christophe': 18, 'Clara':15, 'Amine':10, 'Amandine': 13, 'Lina': 19}
notes_anglais = pd.Series(notes_anglais_dic)

notes = pd.DataFrame({'Maths': notes_maths, 'Physique': notes_physique, 'Informatique': notes_informatique, 
                      'Anglais':notes_anglais})

notes['Moyenne'] = (notes['Maths'] + notes['Physique'] + notes['Informatique'] + notes['Anglais'])/4

In [None]:
notes

In [None]:
notes.loc['Clara':'Amandine', :]

In [None]:
notes.loc['Clara':'Amandine', 'Physique':'Anglais']

In [None]:
notes.loc['Amine':, 'Informatique':]

In [None]:
notes

In [None]:
notes.iloc[1:, 0:3]

In [None]:
notes.iloc[1:3, 3:]

In [None]:
notes.iloc[1:4,:]

In [None]:
notes.iloc[1:4]

In [None]:
notes.loc['Clara':'Amandine']

In [None]:
notes

In [None]:
notes > 10

In [None]:
notes_bool = notes > 10

In [None]:
notes_bool

In [None]:
notes[notes_bool]

In [None]:
notes[notes > 10]

In [None]:
notes['Physique'] > 10

In [None]:
notes[notes['Physique'] > 10]

In [None]:
notes

In [None]:
notes_physique_10 = notes[notes['Physique'] > 10]

In [None]:
notes_physique_10

In [None]:
notes_physique_10['Informatique']

In [None]:
notes[notes['Physique'] > 10]['Informatique']

In [None]:
notes[notes['Physique'] > 10][['Informatique']]

In [None]:
notes[notes['Physique'] > 10][['Maths','Physique','Informatique']]

# Sélection selon plusieurs conditions

In [None]:
notes

In [None]:
notes[(notes['Maths']>=10) & (notes['Informatique'] >=12)]

In [None]:
notes[(notes['Maths'] > 10) | (notes['Informatique'] > 14)]

In [None]:
notes

In [None]:
notes.reset_index()

In [None]:
notes

In [None]:
etudiants = ['etudiant1', 'etudiant2', 'etudiant3', 'etudiant4', 'etudiant5']

In [None]:
notes['Etudiants'] = etudiants

In [None]:
notes

In [None]:
notes.set_index('Etudiants', inplace = True)

In [None]:
notes

In [None]:
notes.loc['etudiant1']

In [None]:
notes.loc[['etudiant1', 'etudiant3']]