# Python Machine Learning

## Import Libraries
scikit-learn documentation: https://scikit-learn.org/stable/

In [None]:
import pandas as pd
from sklearn.linear_model import LogisticRegression

## Today's Dataset
Kaggle - Hotel Reservation Data: <br> https://www.kaggle.com/datasets/ahsan81/hotel-reservations-classification-dataset

In [None]:
hotel_df = pd.read_csv(r"filepath")

In [None]:
hotel_df.sample(7)

## Exploratory Data Analysis

In [None]:
hotel_df.shape

In [None]:
hotel_df.describe()

In [None]:
hotel_df.isna().sum()

In [None]:
hotel_df.duplicated().sum()

In [None]:
hotel_df.corr()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

plt.subplots(figsize=(12, 12))
sns.heatmap(hotel_df.corr(),annot=True, cmap='Spectral');

## Data Transformation

In [None]:
# Create a column for total nights
hotel_df['total_nights'] = hotel_df['no_of_weekend_nights'] + hotel_df['no_of_week_nights']

In [None]:
hotel_df = pd.get_dummies(hotel_df, columns = ['booking_status'])

In [None]:
hotel_df.head()

## Logistic Regression

In [None]:
lr = LogisticRegression()

In [None]:
#split our model into a training group and a testing group
from sklearn.model_selection import train_test_split

train, test = train_test_split(hotel_df, test_size=0.2) # use random 20% of the data for testing

In [None]:
#Select our independent variable
#We need to use the double brackets to keep this as a dataframe, otherwise the single column turns into a series
train_feat = train[['total_nights']]

In [None]:
#select our dependent variable
train_target = train['booking_status_Canceled']

In [None]:
lr.fit(train_feat, train_target)

In [None]:
#Now we test
test_feat = test[['total_nights']]
test_target = test['booking_status_Canceled']
lr.score(test_feat, test_target)

In [None]:
#Display the results of our test using a "confusion matrix"
from sklearn.metrics import confusion_matrix
matrix = confusion_matrix(lr.predict(train_feat), train_target)
print(
    '''                  Confusion Matrix layout
              _______________________________
              | Predicted No | Predicted Yes
    Actual No |  Correct     |   Incorrect
   Actual Yes |  Incorrect   |   Correct
   
   ''')
print(matrix)

In [None]:
#Interpretation of our results 
print(f"Our model correctly predicted {matrix[0,0]} bookings as 'not canceled'.")
print(f"Our model incorrectly predicted {matrix[0,1]} bookings as 'canceled'. These are 'false positives.'")

print(f"Our model incorrectly predicted {matrix[1,0]} bookings as 'not canceled'. These are 'false negatives.'")
print(f"Our model correctly predicted {matrix[1,1]} bookings as 'canceled'.")

## Other Machine Learning Methods
Keep in mind that we're still using the same training and testing variables for the following models.

Support Vector Machine Resources <br>
https://www.geeksforgeeks.org/support-vector-machine-algorithm/ <br>
https://www.analyticsvidhya.com/blog/2017/09/understaing-support-vector-machine-example-code/

In [None]:
from sklearn.svm import LinearSVC 

svm = LinearSVC()
svm.fit(train_feat, train_target)
svm.score(test_feat, test_target)

confusion_matrix(svm.predict(train_feat),train_target)

Multi-layer Perceptron 

In [None]:
from sklearn.neural_network import MLPClassifier 

mlp = MLPClassifier()
mlp.fit(train_feat, train_target)
mlp.score(test_feat, test_target)

confusion_matrix(mlp.predict(train_feat),train_target)

Decision Tree Classifier: <br>
https://towardsdatascience.com/decision-tree-classifier-explained-in-real-life-picking-a-vacation-destination-6226b2b60575

Random Forest Classifier: <br>
https://www.analyticsvidhya.com/blog/2016/04/tree-based-algorithms-complete-tutorial-scratch-in-python/#nine 

In [None]:
from sklearn.ensemble import RandomForestClassifier

rf=RandomForestClassifier()
rf.fit(train_feat, train_target)
rf.score(test_feat, test_target)

confusion_matrix(rf.predict(train_feat),train_target)