In [3]:
import pandas as pd

# Load the data
file_path = 'hotel_bookings.csv'
data = pd.read_csv(file_path)

# Filter top 7 countries with the most bookings
top_countries = data['country'].value_counts().head(7).index
filtered_data = data[data['country'].isin(top_countries)]

###################################################################
# BOOKING TYPE ####################################################
###################################################################
filtered_data['adult_only_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] == 0) & (filtered_data['babies'] == 0)).astype(int)
filtered_data['adult_children_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) & (filtered_data['babies'] == 0)).astype(int)
filtered_data['adult_baby_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] == 0) & (filtered_data['babies'] > 0)).astype(int)
filtered_data['all_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) & (filtered_data['babies'] > 0)).astype(int)
filtered_data['some_kind_of_kid'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) | (filtered_data['babies'] > 0)).astype(int)
###################################################################
# DESTINATION #####################################################
###################################################################
filtered_data['adults_only_hotel'] = ((filtered_data['adult_only_booking'] == 1) & (filtered_data['hotel'] == 'City Hotel')).astype(int)
filtered_data['adults_only_resort'] = ((filtered_data['adult_only_booking'] == 1) & (filtered_data['hotel'] == 'Resort Hotel')).astype(int)
filtered_data['with_kids_hotel'] = ((filtered_data['some_kind_of_kid'] == 1) & (filtered_data['hotel'] == 'City Hotel')).astype(int)
filtered_data['with_kids_resort'] = ((filtered_data['some_kind_of_kid'] == 1) & (filtered_data['hotel'] == 'Resort Hotel')).astype(int)
###################################################################
# CANCELLATION ####################################################
###################################################################
filtered_data['adults_only_cancellations'] = ((filtered_data['adult_only_booking'] == 1) & (filtered_data['is_canceled'] == 1)).astype(int)
filtered_data['adults_only_not_cancellation'] = ((filtered_data['adult_only_booking'] == 1) & (filtered_data['is_canceled'] == 0)).astype(int)
filtered_data['with_kids_cancellation'] = ((filtered_data['some_kind_of_kid'] == 1) & (filtered_data['is_canceled'] == 1)).astype(int)
filtered_data['with_kids_not_cancellation'] = ((filtered_data['some_kind_of_kid'] == 1) & (filtered_data['is_canceled'] == 0)).astype(int)

# Update summary_df with the new aggregations
summary_df = filtered_data.groupby('country').agg(
    total_bookings=('country', 'size'),  # Number of bookings (rows per country)
    adult_only_booking=('adult_only_booking', 'sum'),
    adult_children_booking=('adult_children_booking', 'sum'),
    adult_baby_booking=('adult_baby_booking', 'sum'),
    all_booking=('all_booking', 'sum'),
    some_kind_of_kid=('some_kind_of_kid', 'sum'),
    adults_only_hotel=('adults_only_hotel', 'sum'),
    adults_only_resort=('adults_only_resort', 'sum'),
    with_kids_hotel=('with_kids_hotel', 'sum'),
    with_kids_resort=('with_kids_resort', 'sum'),
    adults_only_cancellations=('adults_only_cancellations', 'sum'),
    adults_only_not_cancellation=('adults_only_not_cancellation', 'sum'),
    with_kids_cancellation=('with_kids_cancellation', 'sum'),
    with_kids_not_cancellation=('with_kids_not_cancellation', 'sum')
).reset_index()

# Display the summary dataframe
summary_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['adult_only_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] == 0) & (filtered_data['babies'] == 0)).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['adult_children_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) & (filtered_data['babies'] == 0)).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https

Unnamed: 0,country,total_bookings,adult_only_booking,adult_children_booking,adult_baby_booking,all_booking,some_kind_of_kid,adults_only_hotel,adults_only_resort,with_kids_hotel,with_kids_resort,adults_only_cancellations,adults_only_not_cancellation,with_kids_cancellation,with_kids_not_cancellation
0,DEU,7287,6920,333,16,2,351,5765,1155,303,48,1126,5794,89,262
1,ESP,8568,7473,947,105,21,1073,4017,3456,573,500,1828,5645,345,728
2,FRA,10415,9482,817,61,16,894,8024,1458,742,152,1698,7784,230,664
3,GBR,12129,11209,813,72,11,896,4878,6331,414,482,2148,9061,301,595
4,IRL,3375,3145,201,20,4,225,1141,2004,63,162,752,2393,78,147
5,ITA,3766,3420,317,10,5,333,2996,424,298,35,1206,2214,122,211
6,PRT,48590,45707,2315,324,90,2731,29834,15873,982,1749,26428,19279,1063,1668


In [4]:
summary_df.to_csv('whole_countries_first.csv', index=False)