In [1]:
import pandas as pd
import numpy as np
import os
import pickle
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import __version__ as sklearn_version
from sklearn.model_selection import cross_validate
import bamboolib as bam

In [2]:
# Import model

expected_model_version = '1.0'
model_path = r'C:\Users\1\Documents\GitHub\Github_Springboard\DataScienceGuidedCapstone-master\Step Five - Modeling\models\ski_resort_pricing_model.pkl'
if os.path.exists(model_path):
    with open(model_path, 'rb') as f:
        model = pickle.load(f)
    if model.version != expected_model_version:
        print("Expected model version doesn't match version loaded")
    if model.sklearn_version != sklearn_version:
        print("Warning: model created under different sklearn version")
else:
    print("Expected model not found")

In [3]:
# Import data
ski_data = pd.read_csv(r'C:\Users\1\Documents\GitHub\Github_Springboard\DataScienceGuidedCapstone-master\Step Four - Preprocessing and Training\data\ski_data_step4_features.csv')

In [4]:
# Define subset for plot data
dfPlot = ski_data.copy()

In [5]:
# Subset rows for plot data
dfPlot.dropna(subset=['AdultWeekend'], inplace=True)

In [6]:
# Subset cols for plot data
remove = ['AdultWeekday','total_chairs_runs_ratio','total_chairs_skiable_ratio','fastQuads_runs_ratio','fastQuads_skiable_ratio']
dfPlot = dfPlot[[i for i in dfPlot.columns if i not in remove]]

In [7]:
# Get Big Mountain Resort Data, x_bmr:
dfPlotNames = dfPlot['Name']
dfPlot = dfPlot.T
dfPlot.columns = dfPlotNames
bmr = dfPlot.pop('Big Mountain Resort')
x_bmr = pd.DataFrame(bmr).T
x_bmr = x_bmr.set_index(['Name','Region','state'])

In [8]:
# Move non-number meta data to index
dfPlot = dfPlot.T
dfPlot = dfPlot.set_index(['Name','Region','state'])

In [9]:
# Divide y from x plot parameters
x_features = dfPlot
y_prices = pd.DataFrame(x_features.pop('AdultWeekend'))
y_bmrPrice = x_bmr.pop('AdultWeekend')

In [20]:
# Define model plot parameters
modelPlotCount = 100
maxVals = x_features.max()
minVals = x_features.min()
plotInterval = (maxVals - minVals) / modelPlotCount
xColsCount = len(x_bmr.columns)

In [21]:
# Define empty DataFrames for iterating through model prediction scenarios
df = pd.DataFrame(index=[i for i in range(0,modelPlotCount)], columns = x_features.columns).copy()
x_modelVars = df.copy()
x_bmrModelInput = df.copy()
y_modelOutput = df.copy()

In [22]:
# Populate x_modelVars with features variables at each interval
x_modelVars.iloc[0,:] = minVals
for i in range(1,modelPlotCount):
    x_modelVars.iloc[i,:] = x_modelVars.iloc[i-1,:] + plotInterval

In [26]:
# Populate y_modelOutput with adjusted predictions for each value in x_modelVars
for row in range(0,modelPlotCount):
    x_bmrMod = x_bmr.copy()
    x_bmrMod.iloc[0,0] = x_modelVars.iloc[row,0]
    y_modelOutput.iloc[row,0] = model.predict(x_bmrMod).item()

In [27]:
y_modelOutput

Unnamed: 0,summit_elev,vertical_drop,base_elev,trams,fastSixes,fastQuads,quad,triple,double,surface,...,state_total_skiable_area_ac,state_total_days_open,state_total_terrain_parks,state_total_nightskiing_ac,resorts_per_100kcapita,resorts_per_100ksq_mile,resort_skiable_area_ac_state_ratio,resort_days_open_state_ratio,resort_terrain_park_state_ratio,resort_night_skiing_state_ratio
0,91.2184,,,,,,,,,,...,,,,,,,,,,
1,91.2184,,,,,,,,,,...,,,,,,,,,,
2,91.2184,,,,,,,,,,...,,,,,,,,,,
3,91.2184,,,,,,,,,,...,,,,,,,,,,
4,91.2184,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,93.2842,,,,,,,,,,...,,,,,,,,,,
96,93.2842,,,,,,,,,,...,,,,,,,,,,
97,93.2902,,,,,,,,,,...,,,,,,,,,,
98,93.2902,,,,,,,,,,...,,,,,,,,,,


In [14]:
x_bmrMod = x_bmr.copy()

In [15]:
x_bmrMod.iloc[0,0] = x_modelVars.iloc[0,0]

In [16]:
x_bmrMod = list(x_bmrMod.iloc[0,:])

In [17]:
x_bmrModelInput.iloc[0,0] = x_bmrMod

In [18]:
# Returns a DataFrame as input for model.predict, given row & col
def predictInput(row,col):
        predictInput = pd.DataFrame(data=x_bmrModelInput.iloc[row,col])
        predictInput = predictInput.T
        predictInput.columns = x_bmr.columns
        return predictInput

In [21]:
y_modelOutput.iloc[0,0] = model.predict(predictInput(0,0)).item()

In [22]:
y_modelOutput.iloc[0,0]

91.21839