# Pandas — Guida pratica con esempi (Serie, DataFrame, I/O, GroupBy, Pivot, MultiIndex)
**Fonte:** slide '5-Pandas.pdf'.  
**Obiettivo:** mostrare tutti i comandi spiegati con esempi eseguibili e commenti chiari.

In [2]:
import numpy as np
import pandas as pd

np.__version__, pd.__version__

('2.3.1', '2.3.0')

## 1) Pandas Series
Una **Series** è una sequenza 1D di elementi omogenei con un **indice esplicito**.

In [3]:
# Da lista (indice implicito)
s1 = pd.Series([2.0, 3.1, 4.5])
display(s1)
print('values ->', s1.values)
print('index  ->', s1.index)

0    2.0
1    3.1
2    4.5
dtype: float64

values -> [2.  3.1 4.5]
index  -> RangeIndex(start=0, stop=3, step=1)


In [4]:
# Da lista con indice esplicito
s2 = pd.Series([2.0, 3.1, 4.5], index=['mon','tue','wed'])
s2

mon    2.0
tue    3.1
wed    4.5
dtype: float64

In [5]:
# Da dizionario (chiavi -> indice)
s3 = pd.Series({'c':2.0, 'b':3.1, 'a':4.5})
s3

c    2.0
b    3.1
a    4.5
dtype: float64

### Accesso: `loc` (etichetta) vs `iloc` (posizione)

In [6]:
s = pd.Series([2.0, 3.1, 4.5], index=['a','b','c'])
print("loc['a'] ->", s.loc['a'])
print('iloc[0]  ->', s.iloc[0])
s.loc['b'] = 10
print('Serie aggiornata:\n', s)

loc['a'] -> 2.0
iloc[0]  -> 2.0
Serie aggiornata:
 a     2.0
b    10.0
c     4.5
dtype: float64


In [7]:
# Slicing
s = pd.Series([2.0, 3.1, 4.5], index=['a','b','c'])
print("s.loc['b':'c'] (stop incluso)\n", s.loc['b':'c'])
print('s.iloc[1:3] (stop escluso)\n', s.iloc[1:3])

s.loc['b':'c'] (stop incluso)
 b    3.1
c    4.5
dtype: float64
s.iloc[1:3] (stop escluso)
 b    3.1
c    4.5
dtype: float64


In [8]:
# Masking
s = pd.Series([2.0, 3.1, 4.5], index=['a','b','c'])
s[(s>2) & (s<10)]

b    3.1
c    4.5
dtype: float64

In [9]:
# Fancy indexing
s = pd.Series([2.0, 3.1, 4.5], index=['a','b','c'])
s.loc[['a','c']], s.iloc[[0,2]]

(a    2.0
 c    4.5
 dtype: float64,
 a    2.0
 c    4.5
 dtype: float64)

## 2) DataFrame
Un **DataFrame** è una tabella 2D: ogni colonna è una `Series` con lo stesso indice.

In [10]:
# Da Series (indice condiviso)
price = pd.Series([1.0, 1.4, 5.0], index=['a','b','c'])
quantity = pd.Series([5, 10, 8], index=['a','b','c'])
liters = pd.Series([1.5, 0.3, 1.0], index=['a','b','c'])
df = pd.DataFrame({'Price': price, 'Quantity': quantity, 'Liters': liters})
df

Unnamed: 0,Price,Quantity,Liters
a,1.0,5,1.5
b,1.4,10,0.3
c,5.0,8,1.0


In [11]:
# Da dict di liste
dct = {'c1':[0,1,2], 'c2':[0,2,4]}
df2 = pd.DataFrame(dct)
df2

Unnamed: 0,c1,c2
0,0,0
1,1,2
2,2,4


In [12]:
# Da lista di dict
dic_list = [{'c1':i, 'c2':2*i} for i in range(3)]
df3 = pd.DataFrame(dic_list)
df3

Unnamed: 0,c1,c2
0,0,0
1,1,2
2,2,4


In [13]:
# Da NumPy 2D
arr = np.arange(6).reshape(3,2)
df4 = pd.DataFrame(arr, columns=['c1','c2'], index=['a','b','c'])
df4

Unnamed: 0,c1,c2
a,0,1
b,2,3
c,4,5


### I/O: CSV e JSON

In [14]:
# CSV con header da saltare
demo_csv_path = '/mnt/data/mycsv.csv'
with open(demo_csv_path, 'w', encoding='utf-8') as f:
    f.write('MyTitle\n')
    f.write('c1,c2,c3\n')
    f.write('0,1,2\n')
    f.write('3,4,5\n')
    f.write('6,7,8\n')

df_csv = pd.read_csv(demo_csv_path, sep=',', skiprows=1)
display(df_csv)
demo_csv_path

FileNotFoundError: [Errno 2] No such file or directory: '/mnt/data/mycsv.csv'

In [None]:
# CSV con NaN personalizzati
demo_csv2_path = '/mnt/data/mycsv_nan.csv'
with open(demo_csv2_path, 'w', encoding='utf-8') as f:
    f.write('c1,c2,c3\n')
    f.write('0,no info,\n')
    f.write('3,4,5\n')
    f.write('6,x,NaN\n')

df_nan = pd.read_csv(demo_csv2_path, sep=',', na_values=['no info','x'])
display(df_nan)
df_nan.dtypes, demo_csv2_path

In [None]:
# to_csv (index=False per non scrivere l'indice)
saved_path = '/mnt/data/savedcsv.csv'
df_csv.to_csv(saved_path, sep=',', index=False)
saved_path

In [None]:
# JSON: salvataggio e lettura
demo_json_path = '/mnt/data/myjson.json'
df_csv.to_json(demo_json_path)
df_json = pd.read_json(demo_json_path)
display(df_json)
demo_json_path

## 3) Accesso e indicizzazione (`loc`/`iloc`)

In [None]:
df.columns, df.index

In [None]:
df.values

In [None]:
df['Quantity']

In [None]:
df.loc['a'], df.iloc[0]

In [None]:
df.loc['b':'c', 'Quantity':'Liters']

In [None]:
mask = (df['Quantity']<10) & (df['Liters']>1)
df.loc[mask, 'Quantity':]

In [None]:
df.loc[mask, ['Price','Liters']]

In [None]:
df.loc[['a','c'], ['Price','Liters']]

### Differenza con NumPy (indicizzazione 2D)

In [None]:
arr = np.array([[1.0, 5.0, 1.5],[1.4,10.0,0.3],[5.0,8.0,1.0]])
arr_res = arr[[0,2],[0,2]]
df_demo = pd.DataFrame(arr, index=['a','b','c'], columns=['Price','Quantity','Liters'])
pandas_res = df_demo.iloc[[0,2],[0,2]]
arr_res, pandas_res

## 4) Modifiche: assegnazioni, add/drop/rename colonne

In [None]:
df2 = df.copy()
df2.loc[['a','c'], ['Price','Liters']] = 0
df2

In [None]:
df_add = df.copy()
df_add['Available'] = pd.Series([True, False, True], index=['a','b','c'])
df_add

In [None]:
df_add2 = df.copy()
df_add2['Available'] = [True, False, True]
df_add2

In [None]:
df_drop = df.copy().drop(columns=['Quantity','Liters'])
df_drop

In [None]:
df_renamed = df.copy().rename(columns={'Quantity':'nItems', 'Liters':'[L]'})
df_renamed

## 5) Computation: ufunc, allineamento, aggregazioni

In [None]:
s = pd.Series([-2.0, -1.0, 0.0, 1.0])
np.exp(s) + np.sin(s/4)

In [None]:
s1 = pd.Series([1,3,10], index=['a','b','c'])
s2 = pd.Series([1,3,30], index=['a','b','d'])
s1, s2, s1 + s2

In [None]:
df1 = pd.DataFrame({'Total':[1,3,5], 'Quantity':[2,4,6]}, index=['a','b','c'])
df2b = pd.DataFrame({'Total':[1,3,5], 'Price':[2,4,6]}, index=['a','b','c'])
df1, df2b, (df1+df2b)

In [None]:
df3 = pd.DataFrame({'Total':[1,3,5], 'Quantity':[2,4,6]}, index=['a','b','c'])
s_row = pd.Series({'Total':1, 'Quantity':2})
df3 + s_row

In [None]:
df1.mean(), df1.std(), df1.sum()

In [None]:
mean_series = df1.mean()
std_series = df1.std()
((df1 - mean_series) / std_series).round(3)

## 6) Missing values: `isnull`, `notnull`, `dropna`, `fillna`

In [None]:
s = pd.Series([4, None, 5, np.nan])
s, s.isnull(), s.notnull()

In [None]:
s.dropna()

In [None]:
df_nan_demo = pd.DataFrame({'Total':[1,3,5], 'Quantity':[2, np.nan, 6]}, index=['a','b','c'])
df_drop_rows = df_nan_demo.dropna()
df_drop_cols = df_nan_demo.dropna(axis='columns')
df_nan_demo, df_drop_rows, df_drop_cols

In [None]:
s = pd.Series([4, None, 5, np.nan])
s.fillna(0), s.fillna(method='ffill'), s.fillna(method='bfill')

## 7) Concat / Merge

In [None]:
sa = pd.Series(['a','b'], index=[1,2])
sb = pd.Series(['c','d'], index=[1,2])
pd.concat((sa, sb)), pd.concat((sa, sb), ignore_index=True)

In [None]:
dfa = pd.DataFrame({'Total':[1,3]}, index=['a','b'])
dfb = pd.DataFrame({'Total':[5,7]}, index=['c','d'])
pd.concat((dfa, dfb))

In [None]:
dfc = pd.DataFrame({'Total':[1,3]}, index=['a','b'])
dfd = pd.DataFrame({'Total':[5,7], 'Liters':[1,2]}, index=['c','d'])
pd.concat((dfc, dfd))

In [None]:
dfL = pd.DataFrame({'k1':[0,1,0,1], 'c2':['a','b','c','d']})
dfR = pd.DataFrame({'k1':[1,0], 'c3':['b1','a1']})
pd.merge(dfL, dfR, on='k1', validate='m:1')

## 8) GroupBy

In [None]:
dfg = pd.DataFrame({'k':['a','b','a','b'], 'c1':[2,10,3,15], 'c2':[4,20,5,30]})
grp = dfg.groupby('k')
list(grp), grp.mean(), grp['c1'].mean(), grp.filter(lambda x: x['c1'].mean()>5)

## 9) Pivot / Pivot Table

In [None]:
dfp = pd.DataFrame({'type':['a','b','b','a','b','a','b','a'], 'class':[3,2,3,3,2,1,1,2], 'fail':[1,1,1,0,1,0,0,0]})
dfp.pivot_table('fail', index='type', columns='class', aggfunc='sum'), dfp.pivot_table('fail', index='type', columns='class', aggfunc='mean')

## 10) MultiIndex

In [None]:
ix = [['Rome','Rome','Turin','Turin'], ['2018','2019','2018','2019']]
s_mi = pd.Series([10,13,7,9], index=ix).sort_index()
s_mi.index.names = ['city','year']
(s_mi, s_mi.loc['Rome'], s_mi.loc[:, '2018'], s_mi[s_mi>10])

In [None]:
ix = [['Rome','Rome','Turin','Turin'], ['2018','2019','2018','2019']]
cols = [['c1','c1','c2','c2'], ['a','b','a','b']]
data = np.arange(16).reshape((4,4))
df_mi = pd.DataFrame(data, index=ix, columns=cols)
df_mi

In [None]:
df_mi.loc[:, 'c1'], df_mi.loc['Rome', 'c1']

In [None]:
df_mi.loc[:, ('c1','a')], df_mi.loc[('Rome','2018'), ('c1','a')]

In [None]:
ixs = pd.IndexSlice
df_mi.loc[ixs['Rome':'Turin', '2018'], ixs['c1':'c2', 'a']]

In [None]:
df_mi.index.names = ['city','year']
df_reset = df_mi.reset_index()
df_set_back = df_reset.set_index(['city','year'])
(df_reset.head(), df_set_back.equals(df_mi))

In [None]:
unstacked = s_mi.unstack()
restacked = unstacked.stack()
max_by_city = df_mi.max(level='city')
max_by_year = df_mi.max(level='year')
max_by_col_level0 = df_mi.max(axis=1, level=0)
(unstacked, restacked.equals(s_mi), max_by_city, max_by_year, max_by_col_level0)

---

*Notebook creato automaticamente il 2025-10-09 09:48:35.*