# **Pandas**
## Indicizzazione

In [9]:
# import librerie
import pandas as pd
import numpy as np

In [10]:
# definizione DataSet
df = pd.DataFrame({
    "nome":["Mark","Andrea","Luca","Alex","Jack","Max","Lou","Kim","Frank","Sam","Paul"],
    "zona":["Sud","Nord","Sud","Nord","Sud","Sud","Centro","Centro","Nord","Sud","Centro"],
    "incassi":[50000,52000,90000,34000,42000,72000,49000,55000,67000,65000,67000],
    "spese":[42000,43000,50000,44000,38000,39000,42000,60000,39000,44000,45000]
})

df

Unnamed: 0,nome,zona,incassi,spese
0,Mark,Sud,50000,42000
1,Andrea,Nord,52000,43000
2,Luca,Sud,90000,50000
3,Alex,Nord,34000,44000
4,Jack,Sud,42000,38000
5,Max,Sud,72000,39000
6,Lou,Centro,49000,42000
7,Kim,Centro,55000,60000
8,Frank,Nord,67000,39000
9,Sam,Sud,65000,44000


In [11]:
# verificare indici
df.index

RangeIndex(start=0, stop=11, step=1)

In [12]:
# verificare indici
df.index.values

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [13]:
# impostare indici basati sui valori di una colonna
df.set_index("nome", inplace=True)

df

Unnamed: 0_level_0,zona,incassi,spese
nome,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Mark,Sud,50000,42000
Andrea,Nord,52000,43000
Luca,Sud,90000,50000
Alex,Nord,34000,44000
Jack,Sud,42000,38000
Max,Sud,72000,39000
Lou,Centro,49000,42000
Kim,Centro,55000,60000
Frank,Nord,67000,39000
Sam,Sud,65000,44000


In [14]:
# resettare indici
# df = df.reset_index()
df.reset_index(inplace=True)

df

Unnamed: 0,nome,zona,incassi,spese
0,Mark,Sud,50000,42000
1,Andrea,Nord,52000,43000
2,Luca,Sud,90000,50000
3,Alex,Nord,34000,44000
4,Jack,Sud,42000,38000
5,Max,Sud,72000,39000
6,Lou,Centro,49000,42000
7,Kim,Centro,55000,60000
8,Frank,Nord,67000,39000
9,Sam,Sud,65000,44000


## Indici basati su date

In [15]:
# definizione range di date
range_date = pd.date_range(start='20/07/2021', end='23/07/2021', freq='h')

range_date

DatetimeIndex(['2021-07-20 00:00:00', '2021-07-20 01:00:00',
               '2021-07-20 02:00:00', '2021-07-20 03:00:00',
               '2021-07-20 04:00:00', '2021-07-20 05:00:00',
               '2021-07-20 06:00:00', '2021-07-20 07:00:00',
               '2021-07-20 08:00:00', '2021-07-20 09:00:00',
               '2021-07-20 10:00:00', '2021-07-20 11:00:00',
               '2021-07-20 12:00:00', '2021-07-20 13:00:00',
               '2021-07-20 14:00:00', '2021-07-20 15:00:00',
               '2021-07-20 16:00:00', '2021-07-20 17:00:00',
               '2021-07-20 18:00:00', '2021-07-20 19:00:00',
               '2021-07-20 20:00:00', '2021-07-20 21:00:00',
               '2021-07-20 22:00:00', '2021-07-20 23:00:00',
               '2021-07-21 00:00:00', '2021-07-21 01:00:00',
               '2021-07-21 02:00:00', '2021-07-21 03:00:00',
               '2021-07-21 04:00:00', '2021-07-21 05:00:00',
               '2021-07-21 06:00:00', '2021-07-21 07:00:00',
               '2021-07-

In [16]:
# definizione DataFrame
df = pd.DataFrame(range_date, columns=['tempo'])

# inserimento nuova colonna con i valori della temperatura
df['temperatura'] = np.random.randint(15, 30, size=(len(range_date)))

df

Unnamed: 0,tempo,temperatura
0,2021-07-20 00:00:00,29
1,2021-07-20 01:00:00,25
2,2021-07-20 02:00:00,23
3,2021-07-20 03:00:00,18
4,2021-07-20 04:00:00,17
...,...,...
68,2021-07-22 20:00:00,17
69,2021-07-22 21:00:00,24
70,2021-07-22 22:00:00,28
71,2021-07-22 23:00:00,16


In [11]:
# impostazione tempo come indice
df.set_index("tempo", inplace=True)

df.head()

Unnamed: 0_level_0,temperatura
tempo,Unnamed: 1_level_1
2021-07-20 00:00:00,21
2021-07-20 01:00:00,28
2021-07-20 02:00:00,26
2021-07-20 03:00:00,26
2021-07-20 04:00:00,28


In [12]:
# operazioni su indici

# selezione righe relative allo stesso giorno
df.loc['2021-07-20']

Unnamed: 0_level_0,temperatura
tempo,Unnamed: 1_level_1
2021-07-20 00:00:00,21
2021-07-20 01:00:00,28
2021-07-20 02:00:00,26
2021-07-20 03:00:00,26
2021-07-20 04:00:00,28
2021-07-20 05:00:00,25
2021-07-20 06:00:00,29
2021-07-20 07:00:00,26
2021-07-20 08:00:00,15
2021-07-20 09:00:00,20


In [13]:
# selezione righe attraverso intervallo temporale
df.loc['2021-07-20 02:00:00' : '2021-07-20 06:00:00']

Unnamed: 0_level_0,temperatura
tempo,Unnamed: 1_level_1
2021-07-20 02:00:00,26
2021-07-20 03:00:00,26
2021-07-20 04:00:00,28
2021-07-20 05:00:00,25
2021-07-20 06:00:00,29


In [14]:
# selezione righe attraverso intervallo temporale
df['2021-07-20': '2021-07-21']

Unnamed: 0_level_0,temperatura
tempo,Unnamed: 1_level_1
2021-07-20 00:00:00,21
2021-07-20 01:00:00,28
2021-07-20 02:00:00,26
2021-07-20 03:00:00,26
2021-07-20 04:00:00,28
2021-07-20 05:00:00,25
2021-07-20 06:00:00,29
2021-07-20 07:00:00,26
2021-07-20 08:00:00,15
2021-07-20 09:00:00,20


## Ordinamento

In [15]:
# definizione DataSet
df = pd.DataFrame({
    "nome":["Mark","Andrea","Luca","Alex","Jack","Max","Lou","Kim","Frank","Sam","Paul"],
    "zona":["Sud","Nord","Sud","Nord","Sud","Sud","Centro","Centro","Nord","Sud","Centro"],
    "incassi":[49000,52000,49000,34000,52000,72000,49000,55000,67000,65000,67000],
    "spese":[42000,43000,50000,44000,38000,39000,42000,60000,39000,44000,45000]
})

df

Unnamed: 0,nome,zona,incassi,spese
0,Mark,Sud,49000,42000
1,Andrea,Nord,52000,43000
2,Luca,Sud,49000,50000
3,Alex,Nord,34000,44000
4,Jack,Sud,52000,38000
5,Max,Sud,72000,39000
6,Lou,Centro,49000,42000
7,Kim,Centro,55000,60000
8,Frank,Nord,67000,39000
9,Sam,Sud,65000,44000


In [16]:
# ordinamento
df.sort_values(by='incassi')


Unnamed: 0,nome,zona,incassi,spese
3,Alex,Nord,34000,44000
0,Mark,Sud,49000,42000
2,Luca,Sud,49000,50000
6,Lou,Centro,49000,42000
1,Andrea,Nord,52000,43000
4,Jack,Sud,52000,38000
7,Kim,Centro,55000,60000
9,Sam,Sud,65000,44000
8,Frank,Nord,67000,39000
10,Paul,Centro,67000,45000


In [17]:
# ordinamento nomi
df.sort_values(by='nome')

Unnamed: 0,nome,zona,incassi,spese
3,Alex,Nord,34000,44000
1,Andrea,Nord,52000,43000
8,Frank,Nord,67000,39000
4,Jack,Sud,52000,38000
7,Kim,Centro,55000,60000
6,Lou,Centro,49000,42000
2,Luca,Sud,49000,50000
0,Mark,Sud,49000,42000
5,Max,Sud,72000,39000
10,Paul,Centro,67000,45000


In [18]:
# ordinamento descrescente
df.sort_values(by='incassi', ascending=False)

Unnamed: 0,nome,zona,incassi,spese
5,Max,Sud,72000,39000
8,Frank,Nord,67000,39000
10,Paul,Centro,67000,45000
9,Sam,Sud,65000,44000
7,Kim,Centro,55000,60000
1,Andrea,Nord,52000,43000
4,Jack,Sud,52000,38000
0,Mark,Sud,49000,42000
2,Luca,Sud,49000,50000
6,Lou,Centro,49000,42000


In [19]:
# ordinamento multiplo
df.sort_values(by=['zona', 'incassi'], ascending=False)

Unnamed: 0,nome,zona,incassi,spese
5,Max,Sud,72000,39000
9,Sam,Sud,65000,44000
4,Jack,Sud,52000,38000
0,Mark,Sud,49000,42000
2,Luca,Sud,49000,50000
8,Frank,Nord,67000,39000
1,Andrea,Nord,52000,43000
3,Alex,Nord,34000,44000
10,Paul,Centro,67000,45000
7,Kim,Centro,55000,60000


In [20]:
# ordinamento multiplo
df.sort_values(by=['incassi', 'spese'], ascending=[False, True])

Unnamed: 0,nome,zona,incassi,spese
5,Max,Sud,72000,39000
8,Frank,Nord,67000,39000
10,Paul,Centro,67000,45000
9,Sam,Sud,65000,44000
7,Kim,Centro,55000,60000
4,Jack,Sud,52000,38000
1,Andrea,Nord,52000,43000
0,Mark,Sud,49000,42000
6,Lou,Centro,49000,42000
2,Luca,Sud,49000,50000
