# Pandas

In [None]:
! pip install pandas --upgrade

In [None]:
import pandas as pd
pd.__version__ # restart the virtual environment first

'2.1.1'

# Create, read and write data

## Creating data

In [None]:
df = pd.Series([100, 210, 320, 450], name="Sales", index=["2019 sales", "2020 sales", "2021 sales", "2022 sales"])
df

210

In [None]:
df = pd.DataFrame({
    "2019 sales": [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200],
    "2020 sales": [110, 210, 310, 410, 510, 610, 710, 810, 910, 1010, 1110, 1210]
}, index=["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Ago", "Sep", "Oct", "Nov", "Dec"])
df

Unnamed: 0,2019 sales,2020 sales
Jan,100,110
Feb,200,210
Mar,300,310
Apr,400,410
May,500,510
Jun,600,610
Jul,700,710
Ago,800,810
Sep,900,910
Oct,1000,1010


In [None]:
df["2020 sales"].sum()

7920

## Reading and writing data

In [None]:
# reading
df = pd.read_csv("listings.csv")
df

In [None]:
# writing
df.to_csv("listings_cleaned.csv", index=False)

In [None]:
# number of rows and columns
df.shape

(48016, 18)

In [None]:
#df.head() # show the first 5 rows
df.tail(10) # show the last 10 rows

In [None]:
# get the column names
df.columns

Index(['id', 'name', 'host_id', 'host_name', 'neighbourhood_group',
       'neighbourhood', 'latitude', 'longitude', 'room_type', 'price',
       'minimum_nights', 'number_of_reviews', 'last_review',
       'reviews_per_month', 'calculated_host_listings_count',
       'availability_365', 'number_of_reviews_ltm', 'license'],
      dtype='object')

## Indexing, selecting and assigning

## Accesing columns

In [None]:
# Accessing a column (version 1)
df.room_type

0        Entire home/apt
1        Entire home/apt
2           Private room
3        Entire home/apt
4        Entire home/apt
              ...       
48011    Entire home/apt
48012    Entire home/apt
48013    Entire home/apt
48014    Entire home/apt
48015       Private room
Name: room_type, Length: 48016, dtype: object

In [None]:
# Accessing a column (version 2 -- preferred)
df["room_type"]
for c in ["neighbourhood_group", "neighbourhood"]:
    print(c, df[c].unique())

In [None]:
# Accessing two columns
df[["neighbourhood_group", "neighbourhood"]]

Unnamed: 0,neighbourhood_group,neighbourhood
0,Lecce,Castrignano del Capo
1,Bari,Bari
2,Lecce,Matino
3,Lecce,Ugento
4,Lecce,Poggiardo
...,...,...
48011,Lecce,Spongano
48012,Lecce,Ugento
48013,Lecce,Spongano
48014,Bari,Polignano a Mare


In [None]:
df[[df.columns[4], df.columns[5]]]

Unnamed: 0,neighbourhood_group,neighbourhood
0,Lecce,Castrignano del Capo
1,Bari,Bari
2,Lecce,Matino
3,Lecce,Ugento
4,Lecce,Poggiardo
...,...,...
48011,Lecce,Spongano
48012,Lecce,Ugento
48013,Lecce,Spongano
48014,Bari,Polignano a Mare


In [None]:
# access rows
df["room_type"][10:20]

10    Entire home/apt
11    Entire home/apt
12    Entire home/apt
13    Entire home/apt
14    Entire home/apt
15    Entire home/apt
16    Entire home/apt
17    Entire home/apt
18    Entire home/apt
19    Entire home/apt
Name: room_type, dtype: object

In [None]:
df[["room_type"]][10:20]

Unnamed: 0,room_type
10,Entire home/apt
11,Entire home/apt
12,Entire home/apt
13,Entire home/apt
14,Entire home/apt
15,Entire home/apt
16,Entire home/apt
17,Entire home/apt
18,Entire home/apt
19,Entire home/apt


In [None]:
# accesing row with index == 1
df.iloc[1]

id                                                              871797626686228714
name                              Rental unit in Bari · 1 bedroom · 1 bed · 1 bath
host_id                                                                  498102198
host_name                                                                     Easy
neighbourhood_group                                                           Bari
neighbourhood                                                                 Bari
latitude                                                                  41.12367
longitude                                                                 16.85844
room_type                                                          Entire home/apt
price                                                                          107
minimum_nights                                                                   1
number_of_reviews                                                                0
last

In [None]:
df.iloc[10, 0]

18130788

In [None]:
df.iloc[0:10, 0]

0              20009845
1    871797626686228714
2    933977345613784276
3              21558875
4    924521740641122778
5              37529281
6    594069192085321405
7               3228329
8               7789194
9    932507624309365236
Name: id, dtype: int64

In [None]:
df.iloc[3:10, 0:3]

Unnamed: 0,id,name,host_id
3,21558875,Vacation home in Fontanelle · ★4.88 · 3 bedroo...,532530809
4,924521740641122778,Home in Poggiardo · 2 bedrooms · 2 beds · 1 bath,26794110
5,37529281,Bed and breakfast in San Vito dei Normanni · 1...,987289
6,594069192085321405,Home in Porto Cesareo · ★4.33 · 4 bedrooms · 8...,108951352
7,3228329,Rental unit in Baia Verde · ★4.80 · 1 bedroom ...,16338830
8,7789194,Home in Porto Cesareo · ★4.75 · 2 bedrooms · 4...,28497898
9,932507624309365236,Bed and breakfast in Bari · ★4.71 · 1 bedroom ...,263017793


In [None]:
df.iloc[[2,4,6], 0:3]

Unnamed: 0,id,name,host_id
2,933977345613784276,Bed and breakfast in Matino · 2 bedrooms · 3 b...,97364266
4,924521740641122778,Home in Poggiardo · 2 bedrooms · 2 beds · 1 bath,26794110
6,594069192085321405,Home in Porto Cesareo · ★4.33 · 4 bedrooms · 8...,108951352


In [None]:
# if I use column names should use loc (not iloc)
df.loc[[2,4,6], "room_type"]

2       Private room
4    Entire home/apt
6    Entire home/apt
Name: room_type, dtype: object

In [None]:
df.loc[2:5, ["host_name", "room_type"]]

Unnamed: 0,host_name,room_type
2,MariaElena,Private room
3,Lucia,Entire home/apt
4,Buena Onda,Entire home/apt
5,Guglielmo,Private room


In [None]:
#df[["host_name", "room_type"]]
df.loc[:, ["host_name", "room_type"]]

Unnamed: 0,host_name,room_type
0,Julien,Entire home/apt
1,Easy,Entire home/apt
2,MariaElena,Private room
3,Lucia,Entire home/apt
4,Buena Onda,Entire home/apt
...,...,...
48011,Giancarlo,Entire home/apt
48012,Antonio,Entire home/apt
48013,Michela,Entire home/apt
48014,Jacqueline Renate,Entire home/apt


## Manipulating the index

In [None]:
df = df.set_index("host_name")
df

Unnamed: 0_level_0,name,host_id,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
host_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Julien,Home in Provincia di Lecce · ★4.81 · 3 bedroom...,3325119,Lecce,Castrignano del Capo,39.812110,18.328000,Entire home/apt,115,3,16,2023-09-10,0.21,8,25,3,
Easy,Rental unit in Bari · 1 bedroom · 1 bed · 1 bath,498102198,Bari,Bari,41.123670,16.858440,Entire home/apt,107,1,0,,,5,7,0,
MariaElena,Bed and breakfast in Matino · 2 bedrooms · 3 b...,97364266,Lecce,Matino,40.029885,18.136070,Private room,60,2,0,,,24,364,0,
Lucia,Vacation home in Fontanelle · ★4.88 · 3 bedroo...,532530809,Lecce,Ugento,39.869980,18.145130,Entire home/apt,170,5,8,2023-08-19,0.16,1,263,3,
Buena Onda,Home in Poggiardo · 2 bedrooms · 2 beds · 1 bath,26794110,Lecce,Poggiardo,40.046419,18.375289,Entire home/apt,55,1,2,2023-09-15,1.25,124,176,2,LE07506191000035453
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Giancarlo,Villa in Spongano · 4 bedrooms · 9 beds · 3 baths,36358333,Lecce,Spongano,40.014210,18.365380,Entire home/apt,220,1,2,2022-08-09,0.04,1,329,0,
Antonio,Home in Torre San Giovanni · 3 bedrooms · 6 be...,60480293,Lecce,Ugento,39.895480,18.104870,Entire home/apt,64,1,0,,,61,24,0,LE07509091000033340
Michela,Villa in Spongano · ★4.50 · 2 bedrooms · 4 bed...,23288546,Lecce,Spongano,40.007450,18.345540,Entire home/apt,90,2,4,2023-08-06,0.28,1,177,1,LE07507891000033141
Jacqueline Renate,Condo in Polignano a Mare · ★4.83 · 2 bedrooms...,1318568,Bari,Polignano a Mare,40.994730,17.225500,Entire home/apt,73,3,23,2023-09-07,0.90,5,365,9,


In [None]:
df.loc["Buena Onda", "name"]

host_name
Buena Onda     Home in Poggiardo · 2 bedrooms · 2 beds · 1 bath
Buena Onda    Villa in Torre Suda · 3 bedrooms · 3 beds · 3 ...
Buena Onda    Home in Torre Suda · 2 bedrooms · 3 beds · 1 bath
Buena Onda    Villa in Castro · ★4.86 · 2 bedrooms · 5 beds ...
Buena Onda    Home in Baia Verde · 2 bedrooms · 4 beds · 1 bath
                                    ...                        
Buena Onda          Home in Nardò · 1 bedroom · 3 beds · 1 bath
Buena Onda          Home in Nardò · 1 bedroom · 3 beds · 1 bath
Buena Onda    Villa in Carovigno · ★5.0 · 3 bedrooms · 6 bed...
Buena Onda    Rental unit in Cutrofiano · 5 bedrooms · 5 bed...
Buena Onda    Home in Torre Suda · 3 bedrooms · 5 beds · 2 b...
Name: name, Length: 326, dtype: object

# Conditional selection

In [None]:
df["neighbourhood_group"] == "Bari"

host_name
Julien               False
Easy                  True
MariaElena           False
Lucia                False
Buena Onda           False
                     ...  
Giancarlo            False
Antonio              False
Michela              False
Jacqueline Renate     True
Vincenzo              True
Name: neighbourhood_group, Length: 48016, dtype: bool

In [None]:
df.loc[df["neighbourhood_group"] == "Bari"]

Unnamed: 0_level_0,name,host_id,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
host_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Easy,Rental unit in Bari · 1 bedroom · 1 bed · 1 bath,498102198,Bari,Bari,41.123670,16.858440,Entire home/apt,107,1,0,,,5,7,0,
Bari,Bed and breakfast in Bari · ★4.71 · 1 bedroom ...,263017793,Bari,Bari,41.122082,16.873199,Private room,78,1,7,2023-08-28,3.13,45,89,7,
Cconforthotels,Vacation home in Bari · ★5.0 · 1 bedroom · 4 b...,864615,Bari,Bari,41.118870,16.873040,Private room,95,1,3,2022-08-05,0.20,42,310,0,
Rosa Anna,Villa in Putignano · ★5.0 · 3 bedrooms · 3 bed...,32677222,Bari,Putignano,40.857040,17.142770,Entire home/apt,110,2,5,2022-09-23,0.08,1,18,0,
Cconforthotels,Vacation home in Bari · 1 bedroom · 1 bed · 1 ...,864615,Bari,Bari,41.121580,16.872220,Private room,119,1,1,2023-06-25,0.32,42,347,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
WePuglia Srl,trullo in Castellana Grotte · 1 bedroom · 1 be...,241678828,Bari,Polignano a Mare,40.910892,17.162709,Entire home/apt,162,3,0,,,26,0,0,
Aldo,Condo in Polignano A Mare · ★4.83 · 1 bedroom ...,32554926,Bari,Polignano a Mare,40.994880,17.226180,Entire home/apt,69,1,24,2023-09-09,0.31,2,77,5,
Antonio,Home in Polignano a Mare · ★4.94 · 1 bedroom ·...,414644885,Bari,Polignano a Mare,40.995670,17.224200,Entire home/apt,90,1,48,2023-09-22,1.83,1,267,16,BA07203591000020554
Jacqueline Renate,Condo in Polignano a Mare · ★4.83 · 2 bedrooms...,1318568,Bari,Polignano a Mare,40.994730,17.225500,Entire home/apt,73,3,23,2023-09-07,0.90,5,365,9,


In [None]:
df.loc[df["neighbourhood_group"] == "Bari", ["latitude", "longitude"]]

Unnamed: 0_level_0,latitude,longitude
host_name,Unnamed: 1_level_1,Unnamed: 2_level_1
Easy,41.123670,16.858440
Bari,41.122082,16.873199
Cconforthotels,41.118870,16.873040
Rosa Anna,40.857040,17.142770
Cconforthotels,41.121580,16.872220
...,...,...
WePuglia Srl,40.910892,17.162709
Aldo,40.994880,17.226180
Antonio,40.995670,17.224200
Jacqueline Renate,40.994730,17.225500


In [None]:
# Exercise: Get the number of listings in neighbourhood_group "Taranto"
# version 1 (mr Manetta) len(df.loc[df["neighbourhood_group"] == "Taranto"])
df.loc[df["neighbourhood_group"] == "Taranto"].shape[0]

4340

In [None]:
# Exercise: list all the private rooms in Taranto
# Tip: A and B --> (A) & (B)
df.loc[(df["neighbourhood_group"] == "Taranto") & (df["room_type"] == "Private room")]

Unnamed: 0_level_0,name,host_id,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
host_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Vito,Villa in Castellaneta Marina · 1 bedroom · 2 b...,80971292,Taranto,Castellaneta,40.47726,16.932220,Private room,129,2,0,,,2,0,0,
Agnese,Bed and breakfast in Mottola · 1 bedroom · 2 b...,20148270,Taranto,Mottola,40.64440,17.077450,Private room,100,3,2,2018-08-11,0.03,1,364,0,
Antonio,Bed and breakfast in Grottaglie · ★4.86 · 1 be...,101098515,Taranto,Grottaglie,40.53505,17.430340,Private room,62,1,7,2022-07-16,0.11,1,62,0,(C.I.S.) TA07300861000016011
Domenico,Bed and breakfast in Marina di Ginosa · ★5.0 ·...,16600594,Taranto,Ginosa,40.42298,16.881770,Private room,74,1,10,2023-05-24,0.15,4,31,3,
Andrea,Home in San Pietro In Bevagna · 5 bedrooms · 7...,75224706,Taranto,Manduria,40.30600,17.645510,Private room,200,7,1,2018-04-28,0.02,1,365,0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Raro Realty,Resort in Taranto · ★New · 1 bedroom · 1 bed ·...,93984333,Taranto,Taranto,40.35467,17.426737,Private room,363,1,0,,,76,364,0,
Alinella,Condo in Taranto · ★4.98 · 1 bedroom · 1 bed ·...,264079445,Taranto,Taranto,40.47406,17.233640,Private room,81,2,45,2023-07-22,0.86,2,83,7,
Filomena,Bed and breakfast in Taranto · ★4.86 · 1 bedro...,148637178,Taranto,Taranto,40.41253,17.219400,Private room,43,1,7,2022-09-22,0.13,2,1,0,
Simona,Home in Taranto · 1 bedroom · 1 bed · 1 privat...,55587111,Taranto,Taranto,40.42379,17.232570,Private room,40,2,1,2021-07-21,0.04,5,365,0,


In [None]:
# Exercise: list all the row in Taranto or Foggia
# Tip: A or B --> (A) | (B)
df.loc[(df["neighbourhood_group"] == "Foggia") | (df["neighbourhood_group"] == "Taranto")]

In [None]:
# Exercise: how many rooms with price lower than 300 euros are in the database?
df.loc[df["price"] < 300].shape[0]

44918

In [None]:
# another way
df.query("neighbourhood_group == 'Taranto' or neighbourhood_group == 'Foggia'")
df.query("price < 300").shape[0]

In [None]:
# NaN values
#df["last_review"].isna()
#df["last_review"].notna()

df[df["last_review"].notna()]


# Assign values

In [None]:
# df["price"] = 1000
df.loc[:, "price"] = 1000
df

In [None]:
df.loc[:, "number_of_reviews"] = df.loc[:, "number_of_reviews"] + 10
df.loc[:, "number_of_reviews"]

host_name
Julien               26
Easy                 10
MariaElena           10
Lucia                18
Buena Onda           12
                     ..
Giancarlo            12
Antonio              10
Michela              14
Jacqueline Renate    33
Vincenzo             10
Name: number_of_reviews, Length: 48016, dtype: int64

In [None]:
# neighbourhood_group:Bari --> Lecce
df.loc[df["neighbourhood_group"] == "Bari"] = "Lecce"
df

In [None]:
f = df["neighbourhood_group"] == "Lecce"
df.loc[f, "price"] =  df.loc[f, "price"] + 10
df

# Summary functions and mapping

## Summary functions

In [None]:
df["price"].mean()

147.8835388203932

In [None]:
df["price"].max()

90000

In [None]:
df["price"].min()

9

In [None]:
df["price"].std()

904.427767879358

In [None]:
df["price"].describe()

count    48016.000000
mean       147.883539
std        904.427768
min          9.000000
25%         60.000000
50%         86.000000
75%        130.000000
max      90000.000000
Name: price, dtype: float64

In [None]:
df["room_type"].describe()

count               48016
unique                  4
top       Entire home/apt
freq                39592
Name: room_type, dtype: object

In [None]:
df["room_type"].unique()

array(['Entire home/apt', 'Private room', 'Hotel room', 'Shared room'],
      dtype=object)

In [None]:
df["room_type"].value_counts()

room_type
Entire home/apt    39592
Private room        8013
Hotel room           343
Shared room           68
Name: count, dtype: int64

In [None]:
for c in df.columns:
    print(c, df[c].isna().sum())

id 0
name 0
host_id 0
host_name 0
neighbourhood_group 0
neighbourhood 0
latitude 0
longitude 0
room_type 0
price 0
minimum_nights 0
number_of_reviews 0
last_review 16318
reviews_per_month 16318
calculated_host_listings_count 0
availability_365 0
number_of_reviews_ltm 0
license 36191


In [None]:
df[["price", "minimum_nights"]].mean()

price             147.883539
minimum_nights      3.295818
dtype: float64

In [None]:
df.loc[df["neighbourhood_group"] == "Bari", "price"].mean()

134.4257754377933

## Mapping

In [None]:
df["room_type"].unique()

array(['Entire home/apt', 'Private room', 'Hotel room', 'Shared room'],
      dtype=object)

In [None]:
m = {
    'Entire home/apt': "E",
    'Private room': "P",
    'Hotel room': "H",
    'Shared room': "S"
}
df["room_type"].map(m)

In [None]:
# def f(x):
#     return len(str(x))

# lambda x: len(str(x))

df["room_type"].map(lambda x: len(str(x)))

0        15
1        15
2        12
3        15
4        15
         ..
48011    15
48012    15
48013    15
48014    15
48015    12
Name: room_type, Length: 48016, dtype: int64

In [None]:
%%time
price_mean = df["price"].mean()

def distance_from_the_mean(row):
    return row["price"] - price_mean

df[["price"]].apply(distance_from_the_mean, axis="columns")

# do NOT do this
# for row in df.index:
#     df.loc[row, "price"] = df.loc[row, "price"] - price_mean

# Data types and missing values

In [None]:
# reading
df = pd.read_csv("listings.csv")
df

## Data types

In [None]:
df.dtypes

id                                  int64
name                               object
host_id                             int64
host_name                          object
neighbourhood_group                object
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                               int64
minimum_nights                      int64
number_of_reviews                   int64
last_review                        object
reviews_per_month                 float64
calculated_host_listings_count      int64
availability_365                    int64
number_of_reviews_ltm               int64
license                            object
dtype: object

In [None]:
df["price"].dtype

dtype('int64')

In [None]:
df["price"].astype('int32')

0        115
1        107
2         60
3        170
4         55
        ... 
48011    220
48012     64
48013     90
48014     73
48015     70
Name: price, Length: 48016, dtype: int32

In [None]:
# putting the dtype in read_csv
df = pd.read_csv("listings.csv", dtype={"price": "int32", "minimum_nights": "uint16"})
df.dtypes

id                                  int64
name                               object
host_id                             int64
host_name                          object
neighbourhood_group                object
neighbourhood                      object
latitude                          float64
longitude                         float64
room_type                          object
price                               int32
minimum_nights                     uint16
number_of_reviews                   int64
last_review                        object
reviews_per_month                 float64
calculated_host_listings_count      int64
availability_365                    int64
number_of_reviews_ltm               int64
license                            object
dtype: object

## Missing values

In [None]:
# df[df["reviews_per_month"].isna(), "reviews_per_month"] = 0
df["reviews_per_month"].fillna(0).astype("int16")

df["reviews_per_month"].fillna(df["reviews_per_month"].mean())

0        0.21000
1        0.61673
2        0.61673
3        0.16000
4        1.25000
          ...   
48011    0.04000
48012    0.61673
48013    0.28000
48014    0.90000
48015    0.61673
Name: reviews_per_month, Length: 48016, dtype: float64

# Exercises

In [None]:
df = pd.read_csv("listings.csv")
df.head()

In [None]:
# Rise the prices of Hotel rooms in Bari by 12%
# def plus_12_percent(row):
#     return row["price"] * 1.12

# df[["price"]].apply(plus_12_percent, axis="columns")

f = (df["neighbourhood_group"] == "Bari") & (df["room_type"] == "Hotel room")
df.loc[f, "price"] =  df.loc[f, "price"] * 1.12

0        128.80
1        119.84
2         67.20
3        190.40
4         61.60
          ...  
48011    246.40
48012     71.68
48013    100.80
48014     81.76
48015     78.40
Length: 48016, dtype: float64

In [None]:
# How many "Entire home/apt" are "Bed and breakfast"?
# tip: use df["field"].str.contains('X')

df[(df["room_type"] == 'Entire home/apt') & df["name"].str.contains("Bed and breakfast")].shape[0]

39

In [None]:
# Mean price by zone (N, S, C) taking
#   Foggia --> N
#   Bari, Barletta-Andria-Trani --> C
#   Lecce, Brindisi, Taranto --> S

# f_N = df["neighbourhood_group"] == "Foggia"
# df.loc[f_N, "price"].mean()

# f_C = (df["neighbourhood_group"] == "Bari") | (df["neighbourhood_group"] == "Barletta-Andria-Trani")
# df.loc[f_C, "price"].mean()

# f_S = (df["neighbourhood_group"] == "Lecce") | (df["neighbourhood_group"] == "Brindisi") | (df["neighbourhood_group"] == "Taranto")
# df.loc[f_S, "price"].mean()


# m = {
#     "Foggia": "N",
#     "Bari": "C", "Barletta-Andria-Trani": "C",
#     "Lecce": "S", "Brindisi": "S", "Taranto" : "S"
# }

#df["neighbourhood_group"] = df["neighbourhood_group"].map(m)

df.groupby("neighbourhood_group").price.mean()

neighbourhood_group
C    129.513684
N    119.635140
S    155.426076
Name: price, dtype: float64