# 酒店预订数据分析
## 加载库

In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import pyecharts
%matplotlib inline

## 数据加载及预览
### 列名解释
* **酒店类型** City Hotel-城市酒店 Resort Hotel-度假酒店
* **is_canceled** 预定是否取消 1-取消 0未取消
* **lead_time** 进入PMS（酒店物业管理系统）订房日期与到店日期间隔
* **arrival_date_year** 到店年份
* **arrival_date_month** 到店月份
* **arrival_date_week_number** 到店星期是一年中的第几个星期
* **arrival_date_day_of_month** 到店日期
* **stays_in_weekend_nights** 周末入住的夜数
* **stays_in_week_nights** 工作日入住的夜数
* **adults** 成年人数量
* **children** 孩子数量
* **babies** 婴儿数量
* **meal** 餐食规格 SC-未定义 BB-早餐 HB-早餐加中餐或午餐 BB-全餐
* **country** 来自哪个国家
* **market_segment** 细分市场名称 TO-旅游批发商 TA-旅游零售商
* **distribution_channel** 预定渠道
* **is_repeated_guest** 预定名是否来自于重复的客人 1为是 0为否
* **previous_cancellations** 客户在当前预定前取消预订的次数
* **previous_bookings_not_canceled** 客户在当前预定前未取消预订的次数
* **reserved_room_type** 预定房型
* **assigned_room_type** 分配房型（预定房型已满或其他原因）
* **booking_changes** 从预定到入住订单的修改次数
* **deposit_type** 押金类型 No Deposit0没有押金 Non Refund-押金不退还 Refundable-押金可退还
* **agent** 旅行社ID
* **company** 预定公司的ID
* **days_in_waiting_list** 订单确认前在等待列表中的天数
* **customer_type** 顾客类型 Contract-合同 Group-团体 Transient-临时 Transient-party-与其他临时订单相关
* **adr** 平均每晚入住花费
* **required_car_parking_spaces** 客户要求的停车位数量
* **total_of_special_requests** 特殊要求数量
* **reservation_status** 订单状态 Canceled-取消 Check-Out-已退房完成订单 No-Show-未入住
* **reservation_status_date** 设置订单最后状态的日期

In [9]:
df = pd.read_csv('hotel_bookings.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 119390 entries, 0 to 119389
Data columns (total 32 columns):
 #   Column                          Non-Null Count   Dtype  
---  ------                          --------------   -----  
 0   hotel                           119390 non-null  object 
 1   is_canceled                     119390 non-null  int64  
 2   lead_time                       119390 non-null  int64  
 3   arrival_date_year               119390 non-null  int64  
 4   arrival_date_month              119390 non-null  object 
 5   arrival_date_week_number        119390 non-null  int64  
 6   arrival_date_day_of_month       119390 non-null  int64  
 7   stays_in_weekend_nights         119390 non-null  int64  
 8   stays_in_week_nights            119390 non-null  int64  
 9   adults                          119390 non-null  int64  
 10  children                        119386 non-null  float64
 11  babies                          119390 non-null  int64  
 12  meal            

## 数据预处理
我们看到


In [16]:
full_data_cln = pd.read_csv('hotel_bookings.csv')
full_data_cln.shape
full_data_cln.columns

# After cleaning, separate Resort and City hotel
# To know the acutal visitor numbers, only bookings that were not canceled are included. 
rh = full_data_cln.loc[(full_data_cln["hotel"] == "Resort Hotel") & (full_data_cln["is_canceled"] == 0)]
ch = full_data_cln.loc[(full_data_cln["hotel"] == "City Hotel") & (full_data_cln["is_canceled"] == 0)]


# get number of acutal guests by country
country_data = pd.DataFrame(full_data_cln.loc[full_data_cln["is_canceled"] == 0]["country"].value_counts())
#country_data.index.name = "country"
country_data.rename(columns={"country": "Number of Guests"}, inplace=True)
total_guests = country_data["Number of Guests"].sum()
country_data["Guests in %"] = round(country_data["Number of Guests"] / total_guests * 100, 2)
country_data["country"] = country_data.index
#country_data.loc[country_data["Guests in %"] < 2, "country"] = "Other"

# pie plot
fig = px.pie(country_data,
             values="Number of Guests",
             names="country",
             title="Home country of guests",
             template="seaborn")
fig.update_traces(textposition="inside", textinfo="value+percent+label")
fig.show()


In [18]:
from pyecharts.charts import Bar

bar = Bar()
bar.add_xaxis(["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"])
bar.add_yaxis("商家A", [5, 20, 36, 10, 75, 90])
# render 会生成本地 HTML 文件，默认会在当前目录生成 render.html 文件
# 也可以传入路径参数，如 bar.render("mycharts.html")
bar.render()

'D:\\study\\数据分析\\kaggle\\hotel-booking-demand\\render.html'