In [3]:
import pandas as pd

# Load the data
file_path = 'hotel_bookings.csv'
data = pd.read_csv(file_path)

# Filter top 7 countries with the most bookings
top_countries = data['country'].value_counts().head(7).index
filtered_data = data[data['country'].isin(top_countries)]

# Add one-hot encoded columns to classify the type of bookings
filtered_data['adult_only_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] == 0) & (filtered_data['babies'] == 0)).astype(int)
filtered_data['adult_children_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) & (filtered_data['babies'] == 0)).astype(int)
filtered_data['adult_baby_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] == 0) & (filtered_data['babies'] > 0)).astype(int)
filtered_data['all_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) & (filtered_data['babies'] > 0)).astype(int)
filtered_data['some_kind_of_kid'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) | (filtered_data['babies'] > 0)).astype(int)

# Group by country and calculate counts for each booking type
summary_df = filtered_data.groupby('country').agg(
    total_bookings=('country', 'size'),  # Number of bookings (rows per country)
    adult_only_booking=('adult_only_booking', 'sum'),
    adult_children_booking=('adult_children_booking', 'sum'),
    adult_baby_booking=('adult_baby_booking', 'sum'),
    all_booking=('all_booking', 'sum'),
    some_kind_of_kid=('some_kind_of_kid', 'sum'),
    city_hotel=('hotel', lambda x: (x == 'City Hotel').sum()),  # Count of City Hotel bookings
    resort_hotel=('hotel', lambda x: (x == 'Resort Hotel').sum()),  # Count of Resort Hotel bookings
    cancellations=('is_canceled', 'sum')  # Sum of cancellations per country
).reset_index()

summary_df


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['adult_only_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] == 0) & (filtered_data['babies'] == 0)).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_data['adult_children_booking'] = ((filtered_data['adults'] > 0) & (filtered_data['children'] > 0) & (filtered_data['babies'] == 0)).astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https

Unnamed: 0,country,total_bookings,adult_only_booking,adult_children_booking,adult_baby_booking,all_booking,some_kind_of_kid,city_hotel,resort_hotel,cancellations
0,DEU,7287,6920,333,16,2,351,6084,1203,1218
1,ESP,8568,7473,947,105,21,1073,4611,3957,2177
2,FRA,10415,9482,817,61,16,894,8804,1611,1934
3,GBR,12129,11209,813,72,11,896,5315,6814,2453
4,IRL,3375,3145,201,20,4,225,1209,2166,832
5,ITA,3766,3420,317,10,5,333,3307,459,1333
6,PRT,48590,45707,2315,324,90,2731,30960,17630,27519


In [4]:
# Save summary_df
summary_df.to_csv('whole_countries_first.csv', index=False)