In [57]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.linear_model import RidgeCV, LassoCV, Ridge, Lasso
from sklearn.linear_model import LinearRegression
from sklearn import linear_model
from sklearn.model_selection import cross_val_score, cross_val_predict, GridSearchCV
from sklearn import metrics
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [58]:
daily_data = pd.read_csv('dataset_completo.csv')
daily_data.head()

Unnamed: 0,specific_prod(kWh/kWp),azimuth,tilt,lat,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
0,3.337966,180.0,15.0,19.354622,6.0,11.6,17.0,28.0,14.0,0.7
1,3.633871,180.0,15.0,19.354622,5.0,10.4,35.0,26.0,13.0,8.9
2,3.685551,180.0,15.0,19.354622,4.0,8.0,51.0,23.0,12.0,17.7
3,3.380731,180.0,15.0,19.354622,4.0,11.6,36.0,24.0,10.0,0.8
4,3.240584,180.0,15.0,19.354622,4.0,9.2,48.0,23.0,10.0,6.3


In [125]:
pd.DataFrame(daily_data).drop('lat', axis=1)

Unnamed: 0,specific_prod(kWh/kWp),azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
0,3.337966,180.0,15.0,6.0,11.6,17.0,28.0,14.0,0.7
1,3.633871,180.0,15.0,5.0,10.4,35.0,26.0,13.0,8.9
2,3.685551,180.0,15.0,4.0,8.0,51.0,23.0,12.0,17.7
3,3.380731,180.0,15.0,4.0,11.6,36.0,24.0,10.0,0.8
4,3.240584,180.0,15.0,4.0,9.2,48.0,23.0,10.0,6.3
...,...,...,...,...,...,...,...,...,...
14405,4.896154,116.0,4.0,5.0,9.2,48.0,29.0,13.0,3.4
14406,4.752991,116.0,4.0,6.0,11.6,25.0,29.0,15.0,0.1
14407,4.878632,116.0,4.0,6.0,11.6,7.0,27.0,15.0,0.0
14408,5.922650,116.0,4.0,6.0,11.6,1.0,27.0,14.0,0.0


In [59]:
daily_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14410 entries, 0 to 14409
Data columns (total 10 columns):
specific_prod(kWh/kWp)    14410 non-null float64
azimuth                   14410 non-null float64
tilt                      14410 non-null float64
lat                       14410 non-null float64
uvIndex                   14410 non-null float64
sunHour                   14410 non-null float64
cloudcover                14410 non-null float64
maxtempC                  14410 non-null float64
mintempC                  14410 non-null float64
precipitation             14410 non-null float64
dtypes: float64(10)
memory usage: 1.1 MB


In [63]:
y = pd.DataFrame(daily_data, columns=['specific_prod(kWh/kWp)'])
y.head()

Unnamed: 0,specific_prod(kWh/kWp)
0,3.337966
1,3.633871
2,3.685551
3,3.380731
4,3.240584


In [65]:
X = pd.DataFrame(daily_data).drop('specific_prod(kWh/kWp)', axis=1)

In [66]:
X.columns

Index(['azimuth', 'tilt', 'lat', 'uvIndex', 'sunHour', 'cloudcover',
       'maxtempC', 'mintempC', 'precipitation'],
      dtype='object')

In [67]:
X = X[['azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover',
       'maxtempC', 'mintempC', 'precipitation']]
X.head()

Unnamed: 0,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
0,180.0,15.0,6.0,11.6,17.0,28.0,14.0,0.7
1,180.0,15.0,5.0,10.4,35.0,26.0,13.0,8.9
2,180.0,15.0,4.0,8.0,51.0,23.0,12.0,17.7
3,180.0,15.0,4.0,11.6,36.0,24.0,10.0,0.8
4,180.0,15.0,4.0,9.2,48.0,23.0,10.0,6.3


In [68]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)

## Scaling

In [69]:
X_scaler = StandardScaler().fit(X_train)
y_scaler = StandardScaler().fit(y_train)

In [70]:
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)
y_train_scaled = y_scaler.transform(y_train)
y_test_scaled = y_scaler.transform(y_test)

In [88]:
y_train_scaled.shape

(10807, 1)

## CV

In [94]:
def get_cv_scores(model):
    scores = cross_val_score(model,
                             X_train_scaled,
                             y_train_scaled,
                             cv=5,
                             scoring='r2')
    print('CV Mean: ', np.mean(scores))
    print('STD: ', np.std(scores))

In [95]:
lr = LinearRegression().fit(X_train_scaled, y_train_scaled)

In [96]:
get_cv_scores(lr)

CV Mean:  0.3435760231777758
STD:  0.017138048357650655


## Ridge

In [97]:
ridge = Ridge(alpha=1).fit(X_train_scaled, y_train_scaled)
get_cv_scores(ridge)

CV Mean:  0.34357889523287877
STD:  0.017129600160297596


In [98]:
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dict(alpha=alpha)

In [99]:
grid = GridSearchCV(estimator=ridge, param_grid=param_grid, scoring='r2', verbose=1, n_jobs=-1)
grid_result = grid.fit(X_train_scaled, y_train_scaled)

Fitting 5 folds for each of 7 candidates, totalling 35 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    6.3s finished


In [100]:
print('Best Score: ', grid_result.best_score_)
print('Best Params: ', grid_result.best_params_)

Best Score:  0.3435992018088869
Best Params:  {'alpha': 10}


## Lasso

In [101]:
lasso = Lasso(alpha=1).fit(X_train_scaled, y_train_scaled)

In [102]:
get_cv_scores(lasso)

CV Mean:  -0.0033740240361740613
STD:  0.0023491192995522777


In [103]:
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dict(alpha=alpha)

In [104]:
grid = GridSearchCV(estimator=lasso, param_grid=param_grid, scoring='r2', verbose=1, n_jobs=-1)
grid_result = grid.fit(X_train_scaled, y_train_scaled)

Fitting 5 folds for each of 7 candidates, totalling 35 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  20 out of  35 | elapsed:    0.1s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done  35 out of  35 | elapsed:    0.1s finished


In [105]:
print('Best Score: ', grid_result.best_score_)
print('Best Params: ', grid_result.best_params_)

Best Score:  0.3436133110907855
Best Params:  {'alpha': 0.001}


## Train Model

In [107]:
lm = linear_model.LinearRegression()
model = lm.fit(X_train_scaled, y_train_scaled)

In [108]:
r2 = lm.score(X_test_scaled, y_test_scaled)
r2

0.33874244860195346

In [109]:
X_1 = sm.add_constant(X_train_scaled)
model = sm.OLS(y_train_scaled, X_1).fit()
model.pvalues

array([1.00000000e+000, 2.51427262e-005, 1.07931105e-071, 4.28231916e-064,
       3.27065093e-048, 6.06337413e-001, 8.70601161e-035, 2.28885495e-220,
       2.20659798e-012, 1.72680315e-001])

## Random Forest (The one that we used)

In [97]:
RandomForestRegModel = RandomForestRegressor(n_estimators=500)
RandomForestRegModel.fit(X_train_scaled, y_train_scaled)

  


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=500,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [98]:
y_predict = RandomForestRegModel.predict(X_test_scaled)

In [99]:
mse = mean_squared_error(y_test_scaled, y_predict)
r2 = np.sqrt(mse)
r2

0.5835514372552615

In [74]:
feature_names = X.columns

sorted(zip(RandomForestRegModel.feature_importances_, feature_names), reverse=True)

### Grid Search 

In [112]:
from sklearn.model_selection import GridSearchCV

In [113]:
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

In [114]:
rf = RandomForestRegressor(n_estimators=500)
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, cv = 3, n_jobs = -1, verbose = 2)


In [115]:
grid_search.fit(X_train_scaled, y_train_scaled)
print(grid_search.best_params_)
print(grid_search.best_score_)

Fitting 3 folds for each of 288 candidates, totalling 864 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:   46.0s
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  3.5min
[Parallel(n_jobs=-1)]: Done 357 tasks      | elapsed:  8.1min
[Parallel(n_jobs=-1)]: Done 640 tasks      | elapsed: 14.6min
[Parallel(n_jobs=-1)]: Done 864 out of 864 | elapsed: 19.8min finished
  self.best_estimator_.fit(X, y, **fit_params)


{'bootstrap': True, 'max_depth': 110, 'max_features': 3, 'min_samples_leaf': 3, 'min_samples_split': 8, 'n_estimators': 1000}
0.6306383382896757


In [124]:
import joblib

joblib.dump(grid_search, 'model.sav')

['model.sav']

In [127]:
X_train_scaled

AttributeError: 'numpy.ndarray' object has no attribute 'info'

## Adjusted Random Forest

In [120]:
RandomForestRegModel = RandomForestRegressor(bootstrap= True, max_depth= 110, max_features= 3, min_samples_leaf= 3, min_samples_split= 8, n_estimators= 10000)
RandomForestRegModel.fit(X_train_scaled, y_train_scaled)

  


RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=110,
                      max_features=3, max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=3, min_samples_split=8,
                      min_weight_fraction_leaf=0.0, n_estimators=1000,
                      n_jobs=None, oob_score=False, random_state=None,
                      verbose=0, warm_start=False)

In [121]:
y_predict = RandomForestRegModel.predict(X_test_scaled)

In [129]:
y_predict

array([ 0.59094295, -0.46126262, -0.45624719, ..., -1.1403652 ,
       -0.40950716,  0.0217436 ])

In [122]:
mse = mean_squared_error(y_test_scaled, y_predict)
r2 = np.sqrt(mse)
r2

0.5900855596313381

In [123]:
feature_names = X.columns

sorted(zip(RandomForestRegModel.feature_importances_, feature_names), reverse=True)

[(0.26793319106174096, 'maxtempC'),
 (0.16595668666671992, 'cloudcover'),
 (0.14681726501474787, 'azimuth'),
 (0.10313261069835639, 'precipitation'),
 (0.0916508712700864, 'tilt'),
 (0.09000691357853202, 'sunHour'),
 (0.07287352911525308, 'mintempC'),
 (0.06162893259456342, 'uvIndex')]

## NN

In [76]:
from sklearn.neural_network import MLPRegressor
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split

In [91]:
regr = MLPRegressor(hidden_layer_sizes=(200,3), random_state=1, max_iter=10000, early_stopping=True).fit(X_train_scaled, y_train_scaled)

  y = column_or_1d(y, warn=True)


In [92]:
regr.predict(X_test_scaled[:2])

array([ 0.76400573, -0.36287344])

In [93]:
regr.score(X_test_scaled, y_test_scaled)

0.5973213701925976

## Otro NN

In [80]:
from sklearn.neural_network import MLPRegressor
import numpy as np
import matplotlib.pyplot as plt

In [81]:
nn = MLPRegressor(
    hidden_layer_sizes=(500,),  activation='relu', solver='adam', alpha=0.001, batch_size='auto',
    learning_rate='constant', learning_rate_init=0.01, power_t=0.5, max_iter=1000, shuffle=True,
    random_state=9, tol=0.0001, verbose=False, warm_start=False, momentum=0.9, nesterovs_momentum=True,
    early_stopping=False, validation_fraction=0.1, beta_1=0.9, beta_2=0.999, epsilon=1e-08)



In [82]:
n = nn.fit(X_train_scaled, y_train_scaled)
test_x = np.arange(0.0, 1, 0.05).reshape(-1, 1)
test_y = nn.predict(X_test)
# fig = plt.figure()
# ax1 = fig.add_subplot(111)
# ax1.scatter(X_train, y_train, s=1, c='b', marker="s", label='real')
# ax1.scatter(X_test,y_test, s=10, c='r', marker="o", label='NN Prediction')
# plt.show()

  y = column_or_1d(y, warn=True)


In [83]:
n.score(X_test_scaled, y_test_scaled)

0.5768187867854833

## Predictions

In [130]:
import joblib

# joblib.dump(grid_search, 'model.sav')
model = joblib.load('model.sav')

### Load and creation of main DF

In [131]:
weather_df = pd.read_csv('clima_predict.csv')
weather_df.head()

Unnamed: 0,date,id,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation,sunrise,sunset
0,2019-08-01,Downtown,4.0,10.3,42.0,24.0,9.0,1.7,07:13 AM,08:13 PM
1,2019-08-02,Downtown,4.0,7.8,49.0,24.0,10.0,4.7,07:13 AM,08:12 PM
2,2019-08-03,Downtown,5.0,11.6,36.0,25.0,10.0,2.1,07:14 AM,08:12 PM
3,2019-08-04,Downtown,4.0,7.8,56.0,24.0,11.0,15.7,07:14 AM,08:11 PM
4,2019-08-05,Downtown,4.0,6.5,77.0,23.0,10.0,25.1,07:14 AM,08:11 PM


In [132]:
site_df = pd.read_csv('site_data.csv')
site_df

Unnamed: 0,id,azimuth,tilt,type,capacity(kWp),lat,lon
0,Downtown,168,15,poli,4.68,19.42363,-99.13177
1,West,193,11,poli,5.83,19.399021,-99.24541
2,East,180,13,poli,2.38,19.40485,-98.992381
3,South,135,13,poli,2.65,19.278835,-99.142413
4,North,197,13,poli,3.18,19.560209,-99.127307


In [133]:
df = pd.merge(weather_df, site_df, how='left', on='id')
df.head()

Unnamed: 0,date,id,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation,sunrise,sunset,azimuth,tilt,type,capacity(kWp),lat,lon
0,2019-08-01,Downtown,4.0,10.3,42.0,24.0,9.0,1.7,07:13 AM,08:13 PM,168,15,poli,4.68,19.42363,-99.13177
1,2019-08-02,Downtown,4.0,7.8,49.0,24.0,10.0,4.7,07:13 AM,08:12 PM,168,15,poli,4.68,19.42363,-99.13177
2,2019-08-03,Downtown,5.0,11.6,36.0,25.0,10.0,2.1,07:14 AM,08:12 PM,168,15,poli,4.68,19.42363,-99.13177
3,2019-08-04,Downtown,4.0,7.8,56.0,24.0,11.0,15.7,07:14 AM,08:11 PM,168,15,poli,4.68,19.42363,-99.13177
4,2019-08-05,Downtown,4.0,6.5,77.0,23.0,10.0,25.1,07:14 AM,08:11 PM,168,15,poli,4.68,19.42363,-99.13177


In [134]:
df['tilt'] = 11
df.head()

Unnamed: 0,date,id,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation,sunrise,sunset,azimuth,tilt,type,capacity(kWp),lat,lon
0,2019-08-01,Downtown,4.0,10.3,42.0,24.0,9.0,1.7,07:13 AM,08:13 PM,168,11,poli,4.68,19.42363,-99.13177
1,2019-08-02,Downtown,4.0,7.8,49.0,24.0,10.0,4.7,07:13 AM,08:12 PM,168,11,poli,4.68,19.42363,-99.13177
2,2019-08-03,Downtown,5.0,11.6,36.0,25.0,10.0,2.1,07:14 AM,08:12 PM,168,11,poli,4.68,19.42363,-99.13177
3,2019-08-04,Downtown,4.0,7.8,56.0,24.0,11.0,15.7,07:14 AM,08:11 PM,168,11,poli,4.68,19.42363,-99.13177
4,2019-08-05,Downtown,4.0,6.5,77.0,23.0,10.0,25.1,07:14 AM,08:11 PM,168,11,poli,4.68,19.42363,-99.13177


In [135]:
df['azimuth'] = 180
df.head()

Unnamed: 0,date,id,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation,sunrise,sunset,azimuth,tilt,type,capacity(kWp),lat,lon
0,2019-08-01,Downtown,4.0,10.3,42.0,24.0,9.0,1.7,07:13 AM,08:13 PM,180,11,poli,4.68,19.42363,-99.13177
1,2019-08-02,Downtown,4.0,7.8,49.0,24.0,10.0,4.7,07:13 AM,08:12 PM,180,11,poli,4.68,19.42363,-99.13177
2,2019-08-03,Downtown,5.0,11.6,36.0,25.0,10.0,2.1,07:14 AM,08:12 PM,180,11,poli,4.68,19.42363,-99.13177
3,2019-08-04,Downtown,4.0,7.8,56.0,24.0,11.0,15.7,07:14 AM,08:11 PM,180,11,poli,4.68,19.42363,-99.13177
4,2019-08-05,Downtown,4.0,6.5,77.0,23.0,10.0,25.1,07:14 AM,08:11 PM,180,11,poli,4.68,19.42363,-99.13177


In [136]:
df = df[['id', 'azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover', 'maxtempC',
       'mintempC', 'precipitation']]
df.info()
df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1830 entries, 0 to 1829
Data columns (total 9 columns):
id               1830 non-null object
azimuth          1830 non-null int64
tilt             1830 non-null int64
uvIndex          1830 non-null float64
sunHour          1830 non-null float64
cloudcover       1830 non-null float64
maxtempC         1830 non-null float64
mintempC         1830 non-null float64
precipitation    1830 non-null float64
dtypes: float64(6), int64(2), object(1)
memory usage: 143.0+ KB


Unnamed: 0,id,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
0,Downtown,180,11,4.0,10.3,42.0,24.0,9.0,1.7
1,Downtown,180,11,4.0,7.8,49.0,24.0,10.0,4.7
2,Downtown,180,11,5.0,11.6,36.0,25.0,10.0,2.1
3,Downtown,180,11,4.0,7.8,56.0,24.0,11.0,15.7
4,Downtown,180,11,4.0,6.5,77.0,23.0,10.0,25.1


### Datasets Preparation

#### Downtown

In [141]:
downtown_df = df[df['id'] == 'Downtown']
downtown_df = downtown_df[['azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover', 'maxtempC',
       'mintempC', 'precipitation']]
downtown_df.info()
downtown_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 366 entries, 0 to 365
Data columns (total 8 columns):
azimuth          366 non-null int64
tilt             366 non-null int64
uvIndex          366 non-null float64
sunHour          366 non-null float64
cloudcover       366 non-null float64
maxtempC         366 non-null float64
mintempC         366 non-null float64
precipitation    366 non-null float64
dtypes: float64(6), int64(2)
memory usage: 25.7 KB


Unnamed: 0,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
0,180,11,4.0,10.3,42.0,24.0,9.0,1.7
1,180,11,4.0,7.8,49.0,24.0,10.0,4.7
2,180,11,5.0,11.6,36.0,25.0,10.0,2.1
3,180,11,4.0,7.8,56.0,24.0,11.0,15.7
4,180,11,4.0,6.5,77.0,23.0,10.0,25.1


In [142]:
X_scaler = StandardScaler().fit(downtown_df)
X_predict = X_scaler.transform(downtown_df)

In [143]:
y_predict = model.predict(X_predict)
y_predict = pd.DataFrame(y_predict)
y_predict.head()

Unnamed: 0,0
0,-0.089898
1,0.14917
2,0.410235
3,-0.370263
4,-0.505266


In [146]:
y_downtown = y_scaler.inverse_transform(y_predict)
downtown_year = y_downtown.sum()
downtown_year

1587.0874706615466

#### West

In [147]:
west_df = df[df['id'] == 'West']
west_df = west_df[['azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover', 'maxtempC',
       'mintempC', 'precipitation']]
west_df.info()
west_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 366 entries, 366 to 731
Data columns (total 8 columns):
azimuth          366 non-null int64
tilt             366 non-null int64
uvIndex          366 non-null float64
sunHour          366 non-null float64
cloudcover       366 non-null float64
maxtempC         366 non-null float64
mintempC         366 non-null float64
precipitation    366 non-null float64
dtypes: float64(6), int64(2)
memory usage: 25.7 KB


Unnamed: 0,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
366,180,11,5.0,9.1,59.0,26.0,13.0,1.8
367,180,11,5.0,10.3,42.0,26.0,14.0,1.6
368,180,11,6.0,11.6,24.0,27.0,15.0,0.6
369,180,11,5.0,9.0,46.0,26.0,14.0,11.3
370,180,11,5.0,6.5,81.0,26.0,15.0,18.1


In [148]:
X_scaler = StandardScaler().fit(west_df)
X_predict = X_scaler.transform(west_df)

In [149]:
y_predict = model.predict(X_predict)
y_predict = pd.DataFrame(y_predict)
y_predict.head()

Unnamed: 0,0
0,-0.021275
1,-0.284382
2,0.687062
3,-0.203362
4,-0.152979


In [150]:
y_west = y_scaler.inverse_transform(y_predict)
west_year = y_west.sum()
west_year

1602.9623798917075

#### East

In [151]:
east_df = df[df['id'] == 'East']
east_df = east_df[['azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover', 'maxtempC',
       'mintempC', 'precipitation']]
east_df.info()
east_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 366 entries, 732 to 1097
Data columns (total 8 columns):
azimuth          366 non-null int64
tilt             366 non-null int64
uvIndex          366 non-null float64
sunHour          366 non-null float64
cloudcover       366 non-null float64
maxtempC         366 non-null float64
mintempC         366 non-null float64
precipitation    366 non-null float64
dtypes: float64(6), int64(2)
memory usage: 25.7 KB


Unnamed: 0,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
732,180,11,3.0,10.3,46.0,19.0,8.0,2.7
733,180,11,3.0,9.0,54.0,18.0,8.0,14.3
734,180,11,5.0,11.6,32.0,22.0,7.0,1.8
735,180,11,3.0,7.8,52.0,18.0,9.0,19.1
736,180,11,3.0,6.5,78.0,19.0,9.0,20.5


In [152]:
X_scaler = StandardScaler().fit(east_df)
X_predict = X_scaler.transform(east_df)

In [153]:
y_predict = model.predict(X_predict)
y_predict = pd.DataFrame(y_predict)
y_predict.head()

Unnamed: 0,0
0,-0.396274
1,-0.673868
2,0.678589
3,-0.591316
4,-0.432468


In [154]:
y_east = y_scaler.inverse_transform(y_predict)
east_year = y_east.sum()
east_year

1594.7475927826506

#### North

In [155]:
north_df = df[df['id'] == 'North']
north_df = north_df[['azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover', 'maxtempC',
       'mintempC', 'precipitation']]
north_df.info()
north_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 366 entries, 1464 to 1829
Data columns (total 8 columns):
azimuth          366 non-null int64
tilt             366 non-null int64
uvIndex          366 non-null float64
sunHour          366 non-null float64
cloudcover       366 non-null float64
maxtempC         366 non-null float64
mintempC         366 non-null float64
precipitation    366 non-null float64
dtypes: float64(6), int64(2)
memory usage: 25.7 KB


Unnamed: 0,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
1464,180,11,4.0,11.6,37.0,25.0,12.0,1.7
1465,180,11,5.0,9.0,55.0,25.0,13.0,2.1
1466,180,11,6.0,11.6,35.0,26.0,13.0,1.1
1467,180,11,5.0,7.8,58.0,25.0,13.0,9.9
1468,180,11,5.0,7.8,74.0,25.0,14.0,21.1


In [156]:
X_scaler = StandardScaler().fit(north_df)
X_predict = X_scaler.transform(north_df)

In [157]:
y_predict = model.predict(X_predict)
y_predict = pd.DataFrame(y_predict)
y_predict.head()

Unnamed: 0,0
0,0.17441
1,0.03283
2,0.722738
3,-0.121815
4,-0.102808


In [158]:
y_north = y_scaler.inverse_transform(y_predict)
north_year = y_north.sum()
north_year

1624.7109545559795

#### South

In [159]:
south_df = df[df['id'] == 'South']
south_df = south_df[['azimuth', 'tilt', 'uvIndex', 'sunHour', 'cloudcover', 'maxtempC',
       'mintempC', 'precipitation']]
south_df.info()
south_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 366 entries, 1098 to 1463
Data columns (total 8 columns):
azimuth          366 non-null int64
tilt             366 non-null int64
uvIndex          366 non-null float64
sunHour          366 non-null float64
cloudcover       366 non-null float64
maxtempC         366 non-null float64
mintempC         366 non-null float64
precipitation    366 non-null float64
dtypes: float64(6), int64(2)
memory usage: 25.7 KB


Unnamed: 0,azimuth,tilt,uvIndex,sunHour,cloudcover,maxtempC,mintempC,precipitation
1098,180,11,4.0,10.3,42.0,24.0,9.0,1.7
1099,180,11,4.0,7.8,49.0,24.0,10.0,4.7
1100,180,11,5.0,11.6,36.0,25.0,10.0,2.1
1101,180,11,4.0,7.8,56.0,24.0,11.0,15.7
1102,180,11,4.0,6.5,77.0,23.0,10.0,25.1


In [160]:
X_scaler = StandardScaler().fit(south_df)
X_predict = X_scaler.transform(south_df)

In [161]:
y_predict = model.predict(X_predict)
y_predict = pd.DataFrame(y_predict)
y_predict.head()

Unnamed: 0,0
0,-0.089898
1,0.14917
2,0.410235
3,-0.370263
4,-0.505266


In [162]:
y_south = y_scaler.inverse_transform(y_predict)
south_year = y_south.sum()
south_year

1587.0428844294906

### JSON

In [183]:
predictions = {
    "North": int(north_year),
    "South": int(south_year),
    "West": int(west_year),
    "East": int(east_year),
    "Downtown": int(downtown_year)
}
predictions

{'North': 1624, 'South': 1587, 'West': 1602, 'East': 1594, 'Downtown': 1587}

In [184]:
import json, pickle

In [188]:
json_predictions = open('predictions.json', 'w')
json.dump(predictions, json_predictions)
json_predictions.close()

In [189]:
load_json = open('predictions.json', 'r')
output = load_json.read()
print(output)

{"North": 1624, "South": 1587, "West": 1602, "East": 1594, "Downtown": 1587}


In [190]:
json_predictions.close()