In [1]:
# Dependencies
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
from sklearn import preprocessing
import tensorflow
tensorflow.keras.__version__

'2.4.0'

In [2]:
# Read data
weather = pd.read_csv('Resources/dataLargerSet-Copy1.csv')
weather.head()

Unnamed: 0.1,Unnamed: 0,index,winery,vintage,points,cities,MarchLowTemp,MarchHighTemp,MarchAvgTemp,MarchPrecip,...,JulyAvgTemp,JulyPrecip,JulyCloudiness,JulyHumidity,AugLowTemp,AugHighTemp,AugAvgTemp,AugPrecip,AugCloudiness,AugHumidity
0,0,6,Claiborne & Churchill,2009,88,San Luis Obispo,42.129032,68.419355,58.580645,1.5,...,82.935484,0.0,2.612903,41.612903,56.741935,89.129032,79.903226,0.1,4.419355,44.0
1,1,7,Claiborne & Churchill,2011,87,San Luis Obispo,43.806452,62.16129,55.258065,9.4,...,72.193548,0.1,1.83871,62.0,61.741935,79.16129,73.806452,0.0,2.548387,57.0
2,2,8,Claiborne & Churchill,2013,94,San Luis Obispo,47.354839,68.548387,60.516129,1.2,...,73.129032,0.2,5.322581,61.354839,61.290323,80.0,73.806452,0.0,2.516129,59.806452
3,3,9,Claiborne & Churchill,2014,93,San Luis Obispo,48.612903,68.322581,61.129032,1.8,...,74.677419,0.3,10.032258,63.612903,62.096774,80.290323,73.967742,0.0,2.612903,63.774194
4,4,10,Claiborne & Churchill,2014,91,San Luis Obispo,48.612903,68.322581,61.129032,1.8,...,74.677419,0.3,10.032258,63.612903,62.096774,80.290323,73.967742,0.0,2.612903,63.774194


In [3]:
# Clean data set
weatherClean=weather.drop(["Unnamed: 0", "index"], axis=1)
weatherClean.head()

Unnamed: 0,winery,vintage,points,cities,MarchLowTemp,MarchHighTemp,MarchAvgTemp,MarchPrecip,MarchCloudiness,MarchHumidity,...,JulyAvgTemp,JulyPrecip,JulyCloudiness,JulyHumidity,AugLowTemp,AugHighTemp,AugAvgTemp,AugPrecip,AugCloudiness,AugHumidity
0,Claiborne & Churchill,2009,88,San Luis Obispo,42.129032,68.419355,58.580645,1.5,12.870968,61.387097,...,82.935484,0.0,2.612903,41.612903,56.741935,89.129032,79.903226,0.1,4.419355,44.0
1,Claiborne & Churchill,2011,87,San Luis Obispo,43.806452,62.16129,55.258065,9.4,35.032258,76.903226,...,72.193548,0.1,1.83871,62.0,61.741935,79.16129,73.806452,0.0,2.548387,57.0
2,Claiborne & Churchill,2013,94,San Luis Obispo,47.354839,68.548387,60.516129,1.2,21.387097,61.677419,...,73.129032,0.2,5.322581,61.354839,61.290323,80.0,73.806452,0.0,2.516129,59.806452
3,Claiborne & Churchill,2014,93,San Luis Obispo,48.612903,68.322581,61.129032,1.8,22.290323,64.258065,...,74.677419,0.3,10.032258,63.612903,62.096774,80.290323,73.967742,0.0,2.612903,63.774194
4,Claiborne & Churchill,2014,91,San Luis Obispo,48.612903,68.322581,61.129032,1.8,22.290323,64.258065,...,74.677419,0.3,10.032258,63.612903,62.096774,80.290323,73.967742,0.0,2.612903,63.774194


In [4]:
# Points into bins 'PointsRange'

bins = [0, 80, 85, 90, 95, 100]
names = ['<80','80-84', '85-89', '90-94', '95-100', '100+']

d = dict(enumerate(names, 1))

weatherClean['pointsRange'] = np.vectorize(d.get)(np.digitize(weatherClean['points'], bins))

weatherClean.head()

Unnamed: 0,winery,vintage,points,cities,MarchLowTemp,MarchHighTemp,MarchAvgTemp,MarchPrecip,MarchCloudiness,MarchHumidity,...,JulyPrecip,JulyCloudiness,JulyHumidity,AugLowTemp,AugHighTemp,AugAvgTemp,AugPrecip,AugCloudiness,AugHumidity,pointsRange
0,Claiborne & Churchill,2009,88,San Luis Obispo,42.129032,68.419355,58.580645,1.5,12.870968,61.387097,...,0.0,2.612903,41.612903,56.741935,89.129032,79.903226,0.1,4.419355,44.0,85-89
1,Claiborne & Churchill,2011,87,San Luis Obispo,43.806452,62.16129,55.258065,9.4,35.032258,76.903226,...,0.1,1.83871,62.0,61.741935,79.16129,73.806452,0.0,2.548387,57.0,85-89
2,Claiborne & Churchill,2013,94,San Luis Obispo,47.354839,68.548387,60.516129,1.2,21.387097,61.677419,...,0.2,5.322581,61.354839,61.290323,80.0,73.806452,0.0,2.516129,59.806452,90-94
3,Claiborne & Churchill,2014,93,San Luis Obispo,48.612903,68.322581,61.129032,1.8,22.290323,64.258065,...,0.3,10.032258,63.612903,62.096774,80.290323,73.967742,0.0,2.612903,63.774194,90-94
4,Claiborne & Churchill,2014,91,San Luis Obispo,48.612903,68.322581,61.129032,1.8,22.290323,64.258065,...,0.3,10.032258,63.612903,62.096774,80.290323,73.967742,0.0,2.612903,63.774194,90-94


In [5]:
# Visualize data
weatherClean.drop_duplicates().pointsRange.value_counts()

85-89     564
90-94     552
80-84      89
95-100     37
Name: pointsRange, dtype: int64

In [6]:
# Visualize data
weatherClean.columns

Index(['winery', 'vintage', 'points', 'cities', 'MarchLowTemp',
       'MarchHighTemp', 'MarchAvgTemp', 'MarchPrecip', 'MarchCloudiness',
       'MarchHumidity', 'AprilLowTemp', 'AprilHighTemp', 'AprilAvgTemp',
       'AprilPrecip', 'AprilCloudiness', 'AprilHumidity', 'MayLowTemp',
       'MayHighTemp', 'MayAvgTemp', 'MayPrecip', 'MayCloudiness',
       'MayHumidity', 'JuneLowTemp', 'JuneHighTemp', 'JuneAvgTemp',
       'JunePrecip', 'JuneCloudiness', 'JuneHumidity', 'JulyLowTemp',
       'JulyHighTemp', 'JulyAvgTemp', 'JulyPrecip', 'JulyCloudiness',
       'JulyHumidity', 'AugLowTemp', 'AugHighTemp', 'AugAvgTemp', 'AugPrecip',
       'AugCloudiness', 'AugHumidity', 'pointsRange'],
      dtype='object')

In [14]:
# Apply ONE-HOT ENCODING

# Step 0: Reformat data
weatherClean2 = weatherClean.values
X = weatherClean2[:, 0:40]
y = weatherClean2[:, 40]

In [23]:
# Data Pre-Processing
X= weatherClean.drop(['winery', 'vintage', 'points', 'cities','pointsRange'], axis=1)
y= weatherClean['pointsRange']


print(X.shape, y.shape)

(1328, 36) (1328,)


In [24]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y)
encoded_y = label_encoder.transform(y)

In [18]:
for label, original_class in zip(encoded_y, y):
    print('Original Class: ' + str(original_class))
    print('Encoded Label: ' + str(label))
    print('-' * 12)

Original Class: 85-89
Encoded Label: 1
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 95-100
Encoded Label: 3
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 80-84
Encoded Label: 0
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Cl

Original Class: 95-100
Encoded Label: 3
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 90-94
Encoded Label: 2
------------
Original Class: 80-84
Encoded Label: 0
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Class: 85-89
Encoded Label: 1
------------
Original Cl

In [19]:
# Step 2: One-hot encoding
one_hot_y = to_categorical(encoded_y)
one_hot_y

array([[0., 1., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       ...,
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [0., 1., 0., 0.]], dtype=float32)

In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    X, one_hot_y, random_state=1)
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

print(X_train_scaled.shape, X_test_scaled.shape)

(996, 36) (332, 36)


In [34]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

print(y_train_categorical.shape, y_test_categorical.shape)

ValueError: y should be a 1d array, got an array of shape (996, 4) instead.

In [20]:
# Create Deep Learning Model

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=4))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [21]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=0
)

ValueError: in user code:

    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\keras\engine\training.py:805 train_function  *
        return step_function(self, iterator)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\keras\engine\training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\distribute\distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\keras\engine\training.py:788 run_step  **
        outputs = model.train_step(data)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\keras\engine\training.py:754 train_step
        y_pred = self(x, training=True)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\keras\engine\base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    C:\Users\Tempe\anaconda3\envs\PythonData\lib\site-packages\tensorflow\python\keras\engine\input_spec.py:259 assert_input_compatibility
        ' but received input with shape ' + display_shape(x.shape))

    ValueError: Input 0 of layer sequential_1 is incompatible with the layer: expected axis -1 of input shape to have value 4 but received input with shape (None, 36)
