# Importing All The **Libraries**

In [None]:
from google.colab import drive
drive.mount("/content/drive")
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

# Connecting the **CSV** file

In [None]:
df = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/ETE Project/1st Project/hotel_booking.csv")
df.head(5)

In [None]:
df.shape

In [None]:
df.columns

In [None]:
df.info()

# Cleaning and preparing the data

In [None]:
df["reservation_status_date"] = pd.to_datetime(df['reservation_status_date'])
type(df["reservation_status_date"][1])

In [None]:
df.describe(include = "object")

In [None]:
for col in df.describe(include = "object").columns:
  print(col)
  print(df[col].unique())
  print('-'*60)

In [None]:
df.isnull().sum()

In [None]:
df.drop(['company','agent'], axis = 1, inplace = True)

In [None]:
df.dropna(inplace = True)

In [None]:
df.describe()

In [None]:
df = df[df['adr']<5000]

# Data Analysis And Visualizations

In [None]:
cancelled_perc = df['is_canceled'].value_counts(normalize =True)
print(cancelled_perc)

plt.figure(figsize=(4, 4))
plt.title('Reservation Status Count')
plt.bar(['Not Canceled', 'Canceled'], df['is_canceled'].value_counts(), edgecolor = 'k', width = 0.7)
plt.show()

In [None]:
plt.figure(figsize=(8, 4))
ax1 = sns.countplot(x='hotel', hue='is_canceled', data=df, palette='Blues')
legend_labels, _ = ax1.get_legend_handles_labels()
ax1.legend(bbox_to_anchor=(1, 1))
plt.title('Reservation Status in Different Hotels', size=20)
plt.xlabel('Hotel')
plt.ylabel('Number of Reservations')
plt.show()

In [None]:
resort_hotel = df[df['hotel']=='Resort Hotel']
resort_hotel['is_canceled'].value_counts(normalize = True)

In [None]:
city_hotel = df[df['hotel']=='City Hotel']
city_hotel['is_canceled'].value_counts(normalize = True)

In [None]:
resort_hotel = resort_hotel.groupby('reservation_status_date')[['adr']].mean()
city_hotel = city_hotel.groupby('reservation_status_date')[['adr']].mean()

In [None]:
plt.figure(figsize = (20,8))
plt.title('Average Daily Rate In City And Resort Hotel', fontsize = 30)
plt.plot(resort_hotel.index, resort_hotel['adr'], label = 'Resort Hotel')
plt.plot(city_hotel.index, city_hotel['adr'], label = 'City Hotel')
plt.legend(fontsize = 20)
plt.show()

In [None]:
df['month'] = df['reservation_status_date'].dt.month
plt.figure(figsize = (16,8))
ax1 = sns.countplot(x = 'month', hue = 'is_canceled', data = df, palette = 'bright')
legend_labels, _ = ax1. get_legend_handles_labels()
ax1.legend(bbox_to_anchor = (1,1))
plt.title('reservation status per month', size = 20)
plt.xlabel('month')
plt.ylabel('number of reservation')
plt.legend(['not canceled', 'canceled'])
plt.show()

In [None]:
plt.figure(figsize=(15, 8))
plt.title('ADR per month', fontsize=30)

# Grouping and summing the 'adr' column for canceled bookings per month
data_to_plot = df[df['is_canceled'] == 1].groupby('month')[['adr']].sum().reset_index()

# Using the correct order of arguments for sns.barplot
sns.barplot(x='month', y='adr', data=data_to_plot)

plt.show()

In [None]:
cancelled_data = df[df['is_canceled']== 1]
top_10_country = cancelled_data['country'].value_counts()[:10]
plt.figure(figsize= (8,8))
plt.title('top 10 countries with reservation canceled')
plt.pie(top_10_country, autopct = '%.2f', labels = top_10_country.index)
plt.show()

In [None]:
df['market_segment'].value_counts()

In [None]:
df['market_segment'].value_counts(normalize = True)

In [None]:
cancelled_data['market_segment'].value_counts(normalize = True)

In [None]:
cancelled_df_adr = cancelled_data.groupby('reservation_status_date')[['adr']].mean()
cancelled_df_adr.reset_index(inplace=True)
cancelled_df_adr.sort_values('reservation_status_date', inplace = True)

not_cancelled_data = df[df['is_canceled']==0]
not_cancelled_data_adr = not_cancelled_data.groupby('reservation_status_date')[['adr']].mean()
not_cancelled_data_adr.reset_index(inplace =True)
not_cancelled_data_adr.sort_values('reservation_status_date', inplace = True)

plt.figure(figsize=(20,6))
plt.title('Average Daily Rate')
plt.plot(not_cancelled_data_adr['reservation_status_date'],not_cancelled_data_adr['adr'],label = 'not Cancelled')
plt.plot(cancelled_df_adr['reservation_status_date'],cancelled_df_adr['adr'],label = 'cancelled')
plt.legend()

In [None]:
cancelled_df_adr = cancelled_df_adr[(cancelled_df_adr['reservation_status_date'] > '2016') & (cancelled_df_adr['reservation_status_date'] < '2017-09')]
not_cancelled_data_adr = not_cancelled_data_adr[(not_cancelled_data_adr['reservation_status_date'] > '2016') & (not_cancelled_data_adr['reservation_status_date'] < '2017-09')]

In [None]:
plt.figure(figsize=(20,6))
plt.title('Average Daily Rate', fontsize = 30)
plt.plot(not_cancelled_data_adr['reservation_status_date'],not_cancelled_data_adr['adr'],label = 'not Cancelled')
plt.plot(cancelled_df_adr['reservation_status_date'],cancelled_df_adr['adr'],label = 'cancelled')
plt.legend()