# **Pandas**
## Funzioni

In [1]:
# import librerie
import pandas as pd
import numpy as np

In [2]:
# definizione DataSet
df = pd.DataFrame({
    "nome":["Mark","Andrea","Luca","Alex","Jack","Max","Lou","Kim","Frank","Sam","Paul"],
    "livello":["5","6","4","4","5","6","3","5","4","6","6"],
    "zona":["Sud","Nord","Sud","Nord","Sud","Sud","Centro","Centro","Nord","Sud","Centro"],
    "incassi":[50000,52000,90000,34000,42000,72000,49000,55000,67000,65000,67000],
    "spese":[42000,43000,50000,44000,38000,39000,42000,60000,39000,44000,45000]
})

df

Unnamed: 0,nome,livello,zona,incassi,spese
0,Mark,5,Sud,50000,42000
1,Andrea,6,Nord,52000,43000
2,Luca,4,Sud,90000,50000
3,Alex,4,Nord,34000,44000
4,Jack,5,Sud,42000,38000
5,Max,6,Sud,72000,39000
6,Lou,3,Centro,49000,42000
7,Kim,5,Centro,55000,60000
8,Frank,4,Nord,67000,39000
9,Sam,6,Sud,65000,44000


In [3]:
df[['incassi','spese']].apply(lambda x: x**2)

Unnamed: 0,incassi,spese
0,2500000000,1764000000
1,2704000000,1849000000
2,8100000000,2500000000
3,1156000000,1936000000
4,1764000000,1444000000
5,5184000000,1521000000
6,2401000000,1764000000
7,3025000000,3600000000
8,4489000000,1521000000
9,4225000000,1936000000


In [7]:
df.memory_usage(index=True, deep=True)

Index      128
nome       670
livello    638
zona       672
incassi     88
spese       88
dtype: int64

In [8]:
# visualizzare il numero di occorrenze per colonna
df['zona'].value_counts()

Sud       5
Nord      3
Centro    3
Name: zona, dtype: int64

In [9]:
df = pd.DataFrame({
    "nome":["Mark","Andrea","Luca","Alex","Jack","Max","Lou","Kim","Frank","Sam","Paul","Paul","Jasmine"],
    "livello":["5","6","4","4","5","6","3","5","4","6","6", "6", "5"],
    "zona":["Sud","Nord","Sud","Nord","Sud","Sud","Centro","Centro","Nord","Sud","Centro","Centro", "Sud"],
    "incassi":[50000,52000,90000,34000,42000,72000,49000,55000,67000,65000,67000,67000, np.nan],
    "spese":[42000,43000,50000,44000,38000,39000,42000,60000,39000,44000,45000,45000, np.nan]
})

df

Unnamed: 0,nome,livello,zona,incassi,spese
0,Mark,5,Sud,50000.0,42000.0
1,Andrea,6,Nord,52000.0,43000.0
2,Luca,4,Sud,90000.0,50000.0
3,Alex,4,Nord,34000.0,44000.0
4,Jack,5,Sud,42000.0,38000.0
5,Max,6,Sud,72000.0,39000.0
6,Lou,3,Centro,49000.0,42000.0
7,Kim,5,Centro,55000.0,60000.0
8,Frank,4,Nord,67000.0,39000.0
9,Sam,6,Sud,65000.0,44000.0


In [10]:
# eliminazione righe duplicate
df.drop_duplicates(inplace=True)
df

Unnamed: 0,nome,livello,zona,incassi,spese
0,Mark,5,Sud,50000.0,42000.0
1,Andrea,6,Nord,52000.0,43000.0
2,Luca,4,Sud,90000.0,50000.0
3,Alex,4,Nord,34000.0,44000.0
4,Jack,5,Sud,42000.0,38000.0
5,Max,6,Sud,72000.0,39000.0
6,Lou,3,Centro,49000.0,42000.0
7,Kim,5,Centro,55000.0,60000.0
8,Frank,4,Nord,67000.0,39000.0
9,Sam,6,Sud,65000.0,44000.0


In [11]:
df.dropna(inplace=True)

df

Unnamed: 0,nome,livello,zona,incassi,spese
0,Mark,5,Sud,50000.0,42000.0
1,Andrea,6,Nord,52000.0,43000.0
2,Luca,4,Sud,90000.0,50000.0
3,Alex,4,Nord,34000.0,44000.0
4,Jack,5,Sud,42000.0,38000.0
5,Max,6,Sud,72000.0,39000.0
6,Lou,3,Centro,49000.0,42000.0
7,Kim,5,Centro,55000.0,60000.0
8,Frank,4,Nord,67000.0,39000.0
9,Sam,6,Sud,65000.0,44000.0


In [12]:
#definizione funzione
def diff(a, b):
    return a - b

# funzione apply
df['add'] = df.apply(lambda row: diff(row['incassi'], row['spese']), axis=1)

df

Unnamed: 0,nome,livello,zona,incassi,spese,add
0,Mark,5,Sud,50000.0,42000.0,8000.0
1,Andrea,6,Nord,52000.0,43000.0,9000.0
2,Luca,4,Sud,90000.0,50000.0,40000.0
3,Alex,4,Nord,34000.0,44000.0,-10000.0
4,Jack,5,Sud,42000.0,38000.0,4000.0
5,Max,6,Sud,72000.0,39000.0,33000.0
6,Lou,3,Centro,49000.0,42000.0,7000.0
7,Kim,5,Centro,55000.0,60000.0,-5000.0
8,Frank,4,Nord,67000.0,39000.0,28000.0
9,Sam,6,Sud,65000.0,44000.0,21000.0


In [13]:
# funzione apply con condizione
df['incassi'].apply(lambda x: (x * 1.2) if x > 50000 else (x * 1.3))

0      65000.0
1      62400.0
2     108000.0
3      44200.0
4      54600.0
5      86400.0
6      63700.0
7      66000.0
8      80400.0
9      78000.0
10     80400.0
Name: incassi, dtype: float64