#Prediction Model [Disneyland]

In [14]:
#Import Dependencies
import pandas as pd
import numpy as np
import requests
import io
import os
from sklearn import datasets, ensemble
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

In [15]:
#Open CSV File
input_data = pd.read_csv(os.path.join('Resources', 'final_dl_data.csv'))

#Print DataFrame
input_data.head(n=2)

Unnamed: 0,Year,Crowd Size,Temperature Max,Hours Open,Public Holiday,School Holiday,Day of the Week,Day of the Year,Crowd Level
0,2020,16,8,12,1,1,4,366,Vacant
1,2020,16,12,12,0,1,3,365,Vacant


In [16]:
#Drop Unnecessary Columns
cols = [column for column in input_data.columns if column not in ['Crowd Level', 'Crowd Size', 'Year', 'Day of the Year']]
data = input_data[cols]

#Assign for Target
target = input_data['Crowd Level']

#Print DataFrame
data.head(n=2)

Unnamed: 0,Temperature Max,Hours Open,Public Holiday,School Holiday,Day of the Week
0,8,12,1,1,4
1,12,12,0,1,3


In [17]:
#Split Dataset to begin Test/Train
data_train, data_test, target_train, target_test = train_test_split(data, target, test_size = 0.10, random_state = 1)

In [18]:
#Increase Max Iter
classifier = LogisticRegression(max_iter = 10000)
classifier.fit(data_train, target_train)

LogisticRegression(max_iter=10000)

In [19]:
#Print Results
print(f"Training Data Score: {classifier.score(data_train, target_train)}")
print(f"Testing Data Score: {classifier.score(data_test, target_test)}")

Training Data Score: 0.5048661800486618
Testing Data Score: 0.48633879781420764


In [20]:
#Print Results
predictions = classifier.predict(data_test)
print(f"First 25 Predictions: {predictions[:25]}")
print(f"First 25 Actual Test Data: {target_test[:25].tolist()}")

First 25 Predictions: ['Closed' 'Crowded' 'Quite Vacant' 'Vacant' 'Closed' 'Busy' 'Crowded'
 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Crowded' 'Crowded'
 'Crowded' 'Busy' 'Quite Crowded' 'Quite Vacant' 'Vacant' 'Quite Vacant'
 'Quite Vacant' 'Quite Vacant' 'Busy' 'Quite Vacant' 'Extreme'
 'Quite Crowded' 'Quite Vacant']
First 25 Actual Test Data: ['Closed', 'Crowded', 'Quite Vacant', 'Vacant', 'Closed', 'Extreme', 'Quite Vacant', 'Crowded', 'Quite Vacant', 'Quite Vacant', 'Quite Vacant', 'Quite Crowded', 'Quite Vacant', 'Quite Crowded', 'Crowded', 'Quite Vacant', 'Vacant', 'Quite Vacant', 'Quite Vacant', 'Vacant', 'Vacant', 'Crowded', 'Busy', 'Extreme', 'Quite Vacant']


In [23]:
#Open CSV File
input_data = pd.read_csv(os.path.join('Resources', 'future_dl_data.csv'))

#Print DataFrame
input_data.head(n=2)

Unnamed: 0,Date,Crowd Size,Temperature Max,Temperature Min,Hours Open,Public Holiday,School Holiday,Day of the Week,Day of the Year,Crowd Level
0,1/01/2021,10000,11,-1,9,1,1,5,1,Vacant
1,2/01/2021,50000,11,0,14,1,1,6,2,Busy


In [25]:
#Drop Unnecessary Columns
cols = [column for column in input_data.columns if column not in ['Crowd Level', 'Temperature Min', 'Date', 'Crowd Size', 'Year', 'Day of the Year']]
data = input_data[cols]

#Print DataFrame
data.head(n=2)

Unnamed: 0,Temperature Max,Hours Open,Public Holiday,School Holiday,Day of the Week
0,11,9,1,1,5
1,11,14,1,1,6


In [26]:
#Print Results
predictions = classifier.predict(data)
print(f"First 365 Predictions:   {predictions[:365]}")

First 365 Predictions:   ['Vacant' 'Busy' 'Busy' 'Crowded' 'Crowded' 'Quite Crowded'
 'Quite Crowded' 'Busy' 'Busy' 'Busy' 'Crowded' 'Quite Vacant'
 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Busy'
 'Crowded' 'Crowded' 'Quite Vacant' 'Quite Vacant' 'Quite Vacant'
 'Quite Vacant' 'Quite Vacant' 'Crowded' 'Crowded' 'Quite Vacant'
 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Busy' 'Crowded'
 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Quite Vacant'
 'Quite Vacant' 'Busy' 'Crowded' 'Quite Vacant' 'Quite Vacant'
 'Quite Vacant' 'Quite Vacant' 'Busy' 'Busy' 'Crowded' 'Crowded' 'Crowded'
 'Quite Vacant' 'Quite Vacant' 'Busy' 'Busy' 'Crowded' 'Quite Vacant'
 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Busy' 'Busy' 'Crowded'
 'Crowded' 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Busy' 'Busy'
 'Crowded' 'Quite Vacant' 'Quite Vacant' 'Quite Vacant' 'Crowded'
 'Quite Vacant' 'Busy' 'Crowded' 'Quite Vacant' 'Quite Vacant'
 'Quite Vacant' 'Crowded' 'Busy' 'Busy' 'Closed' 'Q

In [27]:
#Save as DataFrame
dl_predictions21 = pd.DataFrame(predictions, columns=["Crowd Level"])

In [14]:
#Save as CSV File
#dl_predictions21.to_csv(r'C:\Users\Mickey\Desktop\Final_Project\dl_predictions21.csv', index = False)