In [2]:
import pandas as pd

# Load the CSV file
df = pd.read_csv(r'C:\Users\Andrei.Baidurov\testTRAV\Data\TDStest2.csv')

# Display basic information about the dataset
print("Dataset shape:", df.shape)
print("\nFirst few rows:")
print(df.head())

Dataset shape: (1998, 4)

First few rows:
  Booking Status    Hotel Country Name  Room Nights  Guests
0      Confirmed                Turkey            1       2
1       Rejected                Russia            6       2
2      Cancelled                Russia            6       2
3       Rejected        Czech Republic            4       4
4      Confirmed  United Arab Emirates            1       2


In [5]:
import plotly.express as px

# Create country statistics from the dataframe
country_stats = df.groupby('Hotel Country Name').agg({
    'Hotel Country Name': 'count',  # Count of bookings
    'Room Nights': 'mean',          # Average room nights
    'Guests': 'mean'      # Average number of guests
}).rename(columns={
    'Hotel Country Name': 'Total_Bookings',
    'Room Nights': 'Avg_Room_Nights',
    'Guests': 'Avg_Guests'
}).reset_index()

# Round the averages for better display
country_stats['Avg_Room_Nights'] = country_stats['Avg_Room_Nights'].round(2)
country_stats['Avg_Guests'] = country_stats['Avg_Guests'].round(2)

# Create world map with modern, softer color scheme
fig = px.choropleth(
    country_stats,
    locations='Hotel Country Name',
    locationmode='country names',
    color='Total_Bookings',
    hover_data=['Avg_Room_Nights', 'Avg_Guests'],
    color_continuous_scale='Plasma',  # Modern purple-pink gradient
    title='Hotel Bookings by Country with Statistics'
)

fig.update_layout(
    title_x=0.5,
    title_font=dict(size=18, color='#2c3e50'),
    geo=dict(
        showframe=False, 
        showcoastlines=True,
        coastlinecolor='#ecf0f1',
        projection_type='equirectangular'
    ),
    width=1000,
    height=600,
    paper_bgcolor='#f8f9fa',
    font=dict(color='#34495e')
)

# Update colorbar styling
fig.update_coloraxes(
    colorbar_title_font=dict(size=14, color='#2c3e50'),
    colorbar_tickfont=dict(color='#34495e')
)

fig.show()

# Display statistics with better formatting
print("\nTop 10 Countries by Total Bookings:")
print("=" * 50)
top_countries = country_stats.sort_values('Total_Bookings', ascending=False).head(10)
for idx, row in top_countries.iterrows():
    print(f"{row['Hotel Country Name']:<20} | Bookings: {row['Total_Bookings']:<4} | Avg Nights: {row['Avg_Room_Nights']:<5} | Avg Guests: {row['Avg_Guests']}")


Top 10 Countries by Total Bookings:
United Arab Emirates | Bookings: 461  | Avg Nights: 4.03  | Avg Guests: 2.16
United States        | Bookings: 377  | Avg Nights: 2.14  | Avg Guests: 2.05
Saudi Arabia         | Bookings: 258  | Avg Nights: 3.76  | Avg Guests: 1.85
Canada               | Bookings: 108  | Avg Nights: 1.95  | Avg Guests: 2.09
Greece               | Bookings: 97   | Avg Nights: 5.57  | Avg Guests: 2.78
Sri Lanka            | Bookings: 54   | Avg Nights: 4.76  | Avg Guests: 2.59
Italy                | Bookings: 46   | Avg Nights: 3.63  | Avg Guests: 2.93
France               | Bookings: 45   | Avg Nights: 4.56  | Avg Guests: 3.49
Spain                | Bookings: 39   | Avg Nights: 6.46  | Avg Guests: 4.1
Egypt                | Bookings: 35   | Avg Nights: 6.46  | Avg Guests: 2.6
