In [5]:
# Read Data
import numpy as np
import pandas as pd
raw_data = pd.read_csv("data_10_states_temp_precip.csv")
raw_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1234 entries, 0 to 1233
Data columns (total 6 columns):
State            1234 non-null object
Month            1234 non-null object
Year             1234 non-null int64
Crop Yield       1234 non-null float64
Temperature      1234 non-null float64
Precipitation    1234 non-null float64
dtypes: float64(3), int64(1), object(2)
memory usage: 57.9+ KB


In [6]:
from pandas.plotting import scatter_matrix

attributes = ["Temperature", "Precipitation", "Crop Yield"]
scatter_matrix(raw_data[attributes])

array([[<matplotlib.axes._subplots.AxesSubplot object at 0x7fd47f24cf98>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fd47cfbb208>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fd47cf55668>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7fd47cf6dbe0>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fd47cf8b198>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fd47cf21710>],
       [<matplotlib.axes._subplots.AxesSubplot object at 0x7fd47cf38c88>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fd47ced8240>,
        <matplotlib.axes._subplots.AxesSubplot object at 0x7fd47ceef7b8>]],
      dtype=object)

In [7]:
# split into test and training data
from sklearn.model_selection import train_test_split

raw_train_set, raw_test_set = train_test_split(raw_data, test_size = 0.2)

In [8]:
# data preprocessing
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

num_attribs = ["Temperature", "Precipitation"]
cat_attribs = ["Month"]

full_pipeline = ColumnTransformer([
        ("num", StandardScaler(), num_attribs),
        ("cat", OneHotEncoder(sparse=False), cat_attribs),
    ])

X_train = full_pipeline.fit_transform(raw_train_set)
Y_train = raw_train_set["Crop Yield"].values

X_test = full_pipeline.fit_transform(raw_test_set)
Y_test = raw_test_set["Crop Yield"].values


In [9]:
# Linear Regression Model
from sklearn.linear_model import LinearRegression
lin_reg = LinearRegression()

lin_reg.fit(X_train, Y_train)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [10]:
from sklearn.metrics import mean_squared_error

lin_reg_predictions = lin_reg.predict(X_test)
lin_mse = mean_squared_error(Y_test, lin_reg_predictions)
lin_rmse = np.sqrt(lin_mse)
lin_rmse

8.276931463375657

In [11]:
from sklearn.tree import DecisionTreeRegressor
tree_reg = DecisionTreeRegressor(random_state=42)

tree_reg.fit(X_train, Y_train)

DecisionTreeRegressor(criterion='mse', max_depth=None, max_features=None,
           max_leaf_nodes=None, min_impurity_decrease=0.0,
           min_impurity_split=None, min_samples_leaf=1,
           min_samples_split=2, min_weight_fraction_leaf=0.0,
           presort=False, random_state=42, splitter='best')

In [12]:
tree_reg_predictions = tree_reg.predict(X_test)
tree_mse = mean_squared_error(Y_test, tree_reg_predictions)
tree_rmse = np.sqrt(tree_mse)
tree_rmse

11.290424447757768

In [33]:
from sklearn.ensemble import RandomForestRegressor

forest_reg = RandomForestRegressor(n_estimators = 90, random_state = 42)
forest_reg.fit(X_train, Y_train)

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators=90, n_jobs=None,
           oob_score=False, random_state=42, verbose=0, warm_start=False)

In [34]:
forest_reg_predictions = forest_reg.predict(X_test)
forest_mse = mean_squared_error(Y_test, forest_reg_predictions)
forest_rmse = np.sqrt(forest_mse)
forest_rmse

8.765544483107211

In [13]:
from sklearn.svm import SVR
svm_reg = SVR(kernel='linear')
svm_reg.fit(X_train, Y_train)


SVR(C=1.0, cache_size=200, coef0=0.0, degree=3, epsilon=0.1,
  gamma='auto_deprecated', kernel='linear', max_iter=-1, shrinking=True,
  tol=0.001, verbose=False)

In [14]:
svm_reg_predictions = svm_reg.predict(X_test)
svm_mse = mean_squared_error(Y_test, svm_reg_predictions)
svm_rmse = np.sqrt(svm_mse)
svm_rmse

8.334714258247185

In [73]:
svm_poly_reg = SVR(kernel='poly',degree = 2, C=120,epsilon = 0.3, gamma = 'auto')
svm_poly_reg.fit(X_train, Y_train)

poly_reg_predictions = svm_poly_reg.predict(X_test)
poly_mse = mean_squared_error(Y_test, poly_reg_predictions)
poly_rmse = np.sqrt(poly_mse)
poly_rmse


8.160235723497438

In [160]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout
from keras.constraints import maxnorm
from keras import optimizers


nn_model = Sequential()
layer1 = 500
layer2 = 500
layer3 = 500

#nn_model.add(Dropout(0.2, input_shape=(X_train.shape[1],)))
nn_model.add(Dense(layer1, kernel_initializer='normal', input_dim=X_train.shape[1], activation='relu', kernel_constraint=maxnorm(3)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(layer2, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(layer3, kernel_initializer='normal', activation='relu', kernel_constraint=maxnorm(3)))
nn_model.add(Dropout(0.2))
nn_model.add(Dense(1, kernel_initializer='normal',activation='linear'))

optimizer = optimizers.adam(lr=0.01)
nn_model.compile(loss='mean_squared_error', optimizer=optimizer, metrics=['mean_squared_error'])
nn_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_116 (Dense)            (None, 500)               3500      
_________________________________________________________________
dropout_60 (Dropout)         (None, 500)               0         
_________________________________________________________________
dense_117 (Dense)            (None, 500)               250500    
_________________________________________________________________
dropout_61 (Dropout)         (None, 500)               0         
_________________________________________________________________
dense_118 (Dense)            (None, 500)               250500    
_________________________________________________________________
dropout_62 (Dropout)         (None, 500)               0         
_________________________________________________________________
dense_119 (Dense)            (None, 1)                 501       
Total para

In [161]:
from keras.callbacks import ModelCheckpoint

checkpoint_name = 'Weights-{epoch:03d}--{val_loss:.5f}.hdf5' 
checkpoint = ModelCheckpoint(checkpoint_name, monitor='val_loss', verbose = 1, save_best_only = True, mode ='auto')
callbacks_list = [checkpoint]

In [None]:
nn_model.fit(X_train, Y_train, epochs=500, batch_size=8, validation_split = 0.2, callbacks=callbacks_list)

Train on 789 samples, validate on 198 samples
Epoch 1/500

Epoch 00001: val_loss did not improve from 88.64744
Epoch 2/500

Epoch 00002: val_loss did not improve from 88.64744
Epoch 3/500

Epoch 00003: val_loss did not improve from 88.64744
Epoch 4/500

Epoch 00004: val_loss did not improve from 88.64744
Epoch 5/500

Epoch 00005: val_loss improved from 88.64744 to 88.32775, saving model to Weights-005--88.32775.hdf5
Epoch 6/500

Epoch 00006: val_loss did not improve from 88.32775
Epoch 7/500

Epoch 00007: val_loss did not improve from 88.32775
Epoch 8/500

Epoch 00008: val_loss did not improve from 88.32775
Epoch 9/500

Epoch 00009: val_loss did not improve from 88.32775
Epoch 10/500

Epoch 00010: val_loss did not improve from 88.32775
Epoch 11/500

Epoch 00011: val_loss did not improve from 88.32775
Epoch 12/500

Epoch 00012: val_loss did not improve from 88.32775
Epoch 13/500

Epoch 00013: val_loss did not improve from 88.32775
Epoch 14/500

Epoch 00014: val_loss did not improve from


Epoch 00037: val_loss did not improve from 87.43053
Epoch 38/500

Epoch 00038: val_loss did not improve from 87.43053
Epoch 39/500

Epoch 00039: val_loss did not improve from 87.43053
Epoch 40/500

Epoch 00040: val_loss did not improve from 87.43053
Epoch 41/500

Epoch 00041: val_loss did not improve from 87.43053
Epoch 42/500

Epoch 00042: val_loss did not improve from 87.43053
Epoch 43/500

Epoch 00043: val_loss did not improve from 87.43053
Epoch 44/500

Epoch 00044: val_loss did not improve from 87.43053
Epoch 45/500

Epoch 00045: val_loss did not improve from 87.43053
Epoch 46/500

Epoch 00046: val_loss did not improve from 87.43053
Epoch 47/500

Epoch 00047: val_loss did not improve from 87.43053
Epoch 48/500

Epoch 00048: val_loss did not improve from 87.43053
Epoch 49/500

Epoch 00049: val_loss did not improve from 87.43053
Epoch 50/500

Epoch 00050: val_loss did not improve from 87.43053
Epoch 51/500

Epoch 00051: val_loss did not improve from 87.43053
Epoch 52/500

Epoch 000


Epoch 00074: val_loss did not improve from 87.43053
Epoch 75/500

Epoch 00075: val_loss did not improve from 87.43053
Epoch 76/500

Epoch 00076: val_loss did not improve from 87.43053
Epoch 77/500

Epoch 00077: val_loss did not improve from 87.43053
Epoch 78/500

Epoch 00078: val_loss did not improve from 87.43053
Epoch 79/500

Epoch 00079: val_loss did not improve from 87.43053
Epoch 80/500

Epoch 00080: val_loss did not improve from 87.43053
Epoch 81/500

Epoch 00081: val_loss did not improve from 87.43053
Epoch 82/500

Epoch 00082: val_loss did not improve from 87.43053
Epoch 83/500

Epoch 00083: val_loss did not improve from 87.43053
Epoch 84/500

Epoch 00084: val_loss did not improve from 87.43053
Epoch 85/500

Epoch 00085: val_loss did not improve from 87.43053
Epoch 86/500

Epoch 00086: val_loss did not improve from 87.43053
Epoch 87/500

Epoch 00087: val_loss did not improve from 87.43053
Epoch 88/500

Epoch 00088: val_loss did not improve from 87.43053
Epoch 89/500

Epoch 000


Epoch 00111: val_loss did not improve from 87.43053
Epoch 112/500

Epoch 00112: val_loss did not improve from 87.43053
Epoch 113/500

Epoch 00113: val_loss did not improve from 87.43053
Epoch 114/500

Epoch 00114: val_loss did not improve from 87.43053
Epoch 115/500

Epoch 00115: val_loss did not improve from 87.43053
Epoch 116/500

Epoch 00116: val_loss did not improve from 87.43053
Epoch 117/500

Epoch 00117: val_loss did not improve from 87.43053
Epoch 118/500

Epoch 00118: val_loss did not improve from 87.43053
Epoch 119/500

Epoch 00119: val_loss did not improve from 87.43053
Epoch 120/500

Epoch 00120: val_loss did not improve from 87.43053
Epoch 121/500

Epoch 00121: val_loss did not improve from 87.43053
Epoch 122/500

Epoch 00122: val_loss did not improve from 87.43053
Epoch 123/500

Epoch 00123: val_loss did not improve from 87.43053
Epoch 124/500

Epoch 00124: val_loss did not improve from 87.43053
Epoch 125/500

Epoch 00125: val_loss did not improve from 87.43053
Epoch 126


Epoch 00148: val_loss did not improve from 87.43053
Epoch 149/500

Epoch 00149: val_loss did not improve from 87.43053
Epoch 150/500

Epoch 00150: val_loss did not improve from 87.43053
Epoch 151/500

Epoch 00151: val_loss did not improve from 87.43053
Epoch 152/500

Epoch 00152: val_loss did not improve from 87.43053
Epoch 153/500

Epoch 00153: val_loss did not improve from 87.43053
Epoch 154/500

Epoch 00154: val_loss did not improve from 87.43053
Epoch 155/500

Epoch 00155: val_loss did not improve from 87.43053
Epoch 156/500

Epoch 00156: val_loss did not improve from 87.43053
Epoch 157/500

Epoch 00157: val_loss did not improve from 87.43053
Epoch 158/500

Epoch 00158: val_loss did not improve from 87.43053
Epoch 159/500

Epoch 00159: val_loss did not improve from 87.43053
Epoch 160/500

Epoch 00160: val_loss did not improve from 87.43053
Epoch 161/500

Epoch 00161: val_loss did not improve from 87.43053
Epoch 162/500

Epoch 00162: val_loss did not improve from 87.43053
Epoch 163


Epoch 00185: val_loss did not improve from 87.43053
Epoch 186/500

Epoch 00186: val_loss did not improve from 87.43053
Epoch 187/500

Epoch 00187: val_loss did not improve from 87.43053
Epoch 188/500

Epoch 00188: val_loss did not improve from 87.43053
Epoch 189/500

Epoch 00189: val_loss did not improve from 87.43053
Epoch 190/500

Epoch 00190: val_loss did not improve from 87.43053
Epoch 191/500

Epoch 00191: val_loss did not improve from 87.43053
Epoch 192/500

Epoch 00192: val_loss did not improve from 87.43053
Epoch 193/500

Epoch 00193: val_loss did not improve from 87.43053
Epoch 194/500

Epoch 00194: val_loss did not improve from 87.43053
Epoch 195/500

Epoch 00195: val_loss did not improve from 87.43053
Epoch 196/500

Epoch 00196: val_loss did not improve from 87.43053
Epoch 197/500

Epoch 00197: val_loss did not improve from 87.43053
Epoch 198/500

Epoch 00198: val_loss did not improve from 87.43053
Epoch 199/500

Epoch 00199: val_loss did not improve from 87.43053
Epoch 200


Epoch 00222: val_loss did not improve from 87.43053
Epoch 223/500

Epoch 00223: val_loss did not improve from 87.43053
Epoch 224/500

Epoch 00224: val_loss did not improve from 87.43053
Epoch 225/500

Epoch 00225: val_loss did not improve from 87.43053
Epoch 226/500

Epoch 00226: val_loss did not improve from 87.43053
Epoch 227/500

Epoch 00227: val_loss did not improve from 87.43053
Epoch 228/500

Epoch 00228: val_loss did not improve from 87.43053
Epoch 229/500

Epoch 00229: val_loss did not improve from 87.43053
Epoch 230/500

Epoch 00230: val_loss did not improve from 87.43053
Epoch 231/500

Epoch 00231: val_loss did not improve from 87.43053
Epoch 232/500

Epoch 00232: val_loss did not improve from 87.43053
Epoch 233/500

Epoch 00233: val_loss did not improve from 87.43053
Epoch 234/500

Epoch 00234: val_loss did not improve from 87.43053
Epoch 235/500

Epoch 00235: val_loss did not improve from 87.43053
Epoch 236/500

Epoch 00236: val_loss did not improve from 87.43053
Epoch 237


Epoch 00259: val_loss did not improve from 87.43053
Epoch 260/500

Epoch 00260: val_loss did not improve from 87.43053
Epoch 261/500

Epoch 00261: val_loss did not improve from 87.43053
Epoch 262/500

Epoch 00262: val_loss did not improve from 87.43053
Epoch 263/500

Epoch 00263: val_loss did not improve from 87.43053
Epoch 264/500

Epoch 00264: val_loss did not improve from 87.43053
Epoch 265/500

Epoch 00265: val_loss did not improve from 87.43053
Epoch 266/500

Epoch 00266: val_loss did not improve from 87.43053
Epoch 267/500

Epoch 00267: val_loss did not improve from 87.43053
Epoch 268/500

Epoch 00268: val_loss did not improve from 87.43053
Epoch 269/500

Epoch 00269: val_loss did not improve from 87.43053
Epoch 270/500

Epoch 00270: val_loss did not improve from 87.43053
Epoch 271/500

Epoch 00271: val_loss did not improve from 87.43053
Epoch 272/500

Epoch 00272: val_loss did not improve from 87.43053
Epoch 273/500

Epoch 00273: val_loss improved from 87.43053 to 84.59750, sav


Epoch 00295: val_loss did not improve from 84.59750
Epoch 296/500

Epoch 00296: val_loss did not improve from 84.59750
Epoch 297/500

Epoch 00297: val_loss did not improve from 84.59750
Epoch 298/500

Epoch 00298: val_loss did not improve from 84.59750
Epoch 299/500

Epoch 00299: val_loss did not improve from 84.59750
Epoch 300/500

Epoch 00300: val_loss did not improve from 84.59750
Epoch 301/500

Epoch 00301: val_loss did not improve from 84.59750
Epoch 302/500

Epoch 00302: val_loss did not improve from 84.59750
Epoch 303/500

Epoch 00303: val_loss did not improve from 84.59750
Epoch 304/500

Epoch 00304: val_loss did not improve from 84.59750
Epoch 305/500

Epoch 00305: val_loss did not improve from 84.59750
Epoch 306/500

Epoch 00306: val_loss did not improve from 84.59750
Epoch 307/500

Epoch 00307: val_loss did not improve from 84.59750
Epoch 308/500

Epoch 00308: val_loss did not improve from 84.59750
Epoch 309/500

Epoch 00309: val_loss did not improve from 84.59750
Epoch 310


Epoch 00332: val_loss did not improve from 84.59750
Epoch 333/500

Epoch 00333: val_loss did not improve from 84.59750
Epoch 334/500

Epoch 00334: val_loss did not improve from 84.59750
Epoch 335/500

Epoch 00335: val_loss did not improve from 84.59750
Epoch 336/500

Epoch 00336: val_loss did not improve from 84.59750
Epoch 337/500

Epoch 00337: val_loss did not improve from 84.59750
Epoch 338/500

Epoch 00338: val_loss did not improve from 84.59750
Epoch 339/500

Epoch 00339: val_loss did not improve from 84.59750
Epoch 340/500

Epoch 00340: val_loss did not improve from 84.59750
Epoch 341/500

Epoch 00341: val_loss did not improve from 84.59750
Epoch 342/500

Epoch 00342: val_loss did not improve from 84.59750
Epoch 343/500

Epoch 00343: val_loss did not improve from 84.59750
Epoch 344/500

Epoch 00344: val_loss did not improve from 84.59750
Epoch 345/500

Epoch 00345: val_loss did not improve from 84.59750
Epoch 346/500

Epoch 00346: val_loss did not improve from 84.59750
Epoch 347


Epoch 00369: val_loss did not improve from 84.59750
Epoch 370/500

Epoch 00370: val_loss did not improve from 84.59750
Epoch 371/500

Epoch 00371: val_loss did not improve from 84.59750
Epoch 372/500

Epoch 00372: val_loss did not improve from 84.59750
Epoch 373/500

Epoch 00373: val_loss did not improve from 84.59750
Epoch 374/500

Epoch 00374: val_loss did not improve from 84.59750
Epoch 375/500

Epoch 00375: val_loss did not improve from 84.59750
Epoch 376/500

Epoch 00376: val_loss did not improve from 84.59750
Epoch 377/500

Epoch 00377: val_loss did not improve from 84.59750
Epoch 378/500

Epoch 00378: val_loss did not improve from 84.59750
Epoch 379/500

Epoch 00379: val_loss did not improve from 84.59750
Epoch 380/500

Epoch 00380: val_loss did not improve from 84.59750
Epoch 381/500

Epoch 00381: val_loss did not improve from 84.59750
Epoch 382/500

Epoch 00382: val_loss did not improve from 84.59750
Epoch 383/500

Epoch 00383: val_loss did not improve from 84.59750
Epoch 384


Epoch 00406: val_loss did not improve from 84.59750
Epoch 407/500

Epoch 00407: val_loss did not improve from 84.59750
Epoch 408/500

Epoch 00408: val_loss did not improve from 84.59750
Epoch 409/500

Epoch 00409: val_loss did not improve from 84.59750
Epoch 410/500

Epoch 00410: val_loss did not improve from 84.59750
Epoch 411/500

Epoch 00411: val_loss did not improve from 84.59750
Epoch 412/500

Epoch 00412: val_loss did not improve from 84.59750
Epoch 413/500

Epoch 00413: val_loss did not improve from 84.59750
Epoch 414/500

Epoch 00414: val_loss did not improve from 84.59750
Epoch 415/500

Epoch 00415: val_loss did not improve from 84.59750
Epoch 416/500

Epoch 00416: val_loss did not improve from 84.59750
Epoch 417/500

Epoch 00417: val_loss did not improve from 84.59750
Epoch 418/500

Epoch 00418: val_loss did not improve from 84.59750
Epoch 419/500

Epoch 00419: val_loss did not improve from 84.59750
Epoch 420/500

Epoch 00420: val_loss did not improve from 84.59750
Epoch 421

In [163]:
nn_predictions = nn_model.predict(X_test)
nn_mse = mean_squared_error(Y_test, nn_predictions)
nn_rmse = np.sqrt(nn_mse)
nn_rmse

8.559820635687789