In [1]:
import pandas as pd
import numpy as np
df = pd.read_csv('SolarDelhi.csv')

In [None]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
x = df[df.columns[:10]]
y = df['ALLSKY_SFC_SW_DWN']
x_train, x_test, y_train, y_test = train_test_split(x, y , train_size = 0.7, random_state =  90)
#Select numerical columns which needs to be normalized
train_norm = x_train[x_train.columns[0:10]]
test_norm = x_test[x_test.columns[0:10]]
# Normalize Training Data 
std_scale = preprocessing.StandardScaler().fit(train_norm)
x_train_norm = std_scale.transform(train_norm)
#Converting numpy array to dataframe
training_norm_col = pd.DataFrame(x_train_norm, index=train_norm.index, columns=train_norm.columns) 
x_train.update(training_norm_col)
#x_train.head()
# Normalize Testing Data by using mean and SD of training set
x_test_norm = std_scale.transform(test_norm)
testing_norm_col = pd.DataFrame(x_test_norm, index=test_norm.index, columns=test_norm.columns) 
x_test.update(testing_norm_col)

In [8]:
# IMPORT ML CLASSIFIERS
from sklearn.linear_model import LinearRegression # Linear regression
from sklearn.ensemble import RandomForestRegressor # random forest regression
from sklearn.neural_network import MLPRegressor # neural network regression
from sklearn.svm import SVR # support vector regression

from sklearn import preprocessing # ML tools
from sklearn.model_selection import train_test_split # split data


from bokeh.plotting import figure, show, output_notebook

def plot_test(clf,X_test,y_test):
    y_predicted = clf.predict(X_test)

    p = figure(tools='pan,box_zoom,reset',x_range=[0, 100], title='Model validation',y_axis_label='radiation')
    p.grid.minor_grid_line_color = '#eeeeee'

    p.line(range(len(y_test)),y_test,legend='actual',line_color='blue')
    p.line(range(len(y_test)),y_predicted,legend='prediction',line_color='red')
    output_notebook()
    show(p)
    return

def plot_real(clf,x,y_actual,index):
    ''' Plot predictions for actual measurements.
    inputs:
        clf         as classifier   the trained algorithm
        x           as array        timeseries of measurement inputs
        y_actual    as array        corresponding timeseries of actual results
    '''
    y_predicted = clf.predict(x)

    p = figure(toolbar_location='right', title='Predicted vs Actual',y_axis_label='radiation',x_axis_type="datetime")
    p.grid.minor_grid_line_color = '#eeeeee'

    p.line(index,y_actual,legend='actual',line_color='blue')
    p.line(index,y_predicted,legend='prediction',line_color='red')
    output_notebook()
    show(p)
    return

def train_model(X,y,clf,debug=False):
    ''' Train algorithm.
    inputs:
        X       as array        features
        y       as array        label(s)
        clf     as scikit-learn classifier (untrained)
    returns:
        clf     as trained classifier
        accuracy  as float
    '''
    X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)
    model = clf.fit(X_train,y_train)
    accuracy = clf.score(X_test,y_test)
    return clf, model, accuracy,X_test, y_test

def go(x,y,algorithm,debug=True):
    ''' Easy model train and test. '''
    clf, model, accuracy, X_test, y_test=train_model(x,y,algorithm,debug=True)
    print('Accuracy: %s percent'%str(accuracy*100))

    if debug:
        plot_test(clf,X_test,y_test)
        plot_real(clf,x,y,df.index.values)
    return

def optimize_randomforest(x,y,try_n=10,try_f='auto',try_s=1):
    ''' Find best combo of tunable params for random forest regressor. '''
    best_score = float('-inf') # initialize score
    for n in try_n:
        for f in try_f:
            for s in try_s:
                clf = RandomForestRegressor(oob_score=True,n_estimators=n,max_features=f,min_samples_leaf=s,n_jobs=-1)
                clf.fit(x,y)
                if clf.oob_score_ > best_score:
                    best_score, best_clf, best_n, best_f, best_s = clf.oob_score_, clf, n, f, s
    return clf, best_n, best_f, best_s

x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.2)
n=[100,200,300,500]
f=[2,4,6]
s=[1,2,4,8,16]
clf, n, f, s = optimize_randomforest(x_train,y_train,try_n=n,try_f=f,try_s=s)
print('n_estimators: '+str(n))
print('max_features: '+str(f))
print('min_samples_leaf: '+str(s))
go(x_train, x_test, y_train, y_test,RandomForestRegressor(n_estimators=n,max_features=f,min_samples_leaf=s,n_jobs=-1))
#go(x,y,LinearRegression())

n_estimators: 300
max_features: 6
min_samples_leaf: 1
Accuracy: 96.97116888623168 percent
Accuracy2: 99.5888153032324 percent


In [10]:
import pandas
from sklearn.model_selection import KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.svm import SVR
import numpy as np

scaler = MinMaxScaler(feature_range=(0, 1))
X = scaler.fit_transform(x)


scores = []
best_svr = SVR(kernel='rbf')
cv = KFold(n_splits=10, random_state=42, shuffle=True)
for train_index, test_index in cv.split(X):
    print("Train Index: ", train_index, "\n")
    print("Test Index: ", test_index)

    X_train, X_test, y_train, y_test = X[train_index], X[test_index], y[train_index], y[test_index]
    best_svr.fit(X_train, y_train)
    scores.append(best_svr.score(X_test, y_test))

Train Index:  [   0    1    2 ... 6605 6606 6607] 

Test Index:  [   8   14   17   23   31   37   50   61   79   80   84   88   91   93
   96  132  156  157  167  179  185  196  198  208  217  228  230  233
  239  247  248  263  265  296  308  318  322  324  332  334  346  351
  373  378  381  393  401  425  432  435  465  468  469  472  476  486
  491  496  503  511  534  535  538  544  549  561  585  604  611  622
  625  676  681  683  696  706  712  736  743  747  752  755  756  763
  764  800  805  811  812  828  842  865  932  960  982  994  996 1002
 1005 1022 1034 1038 1039 1042 1047 1049 1057 1074 1084 1090 1097 1101
 1108 1115 1144 1149 1161 1163 1174 1175 1176 1181 1183 1188 1193 1212
 1223 1242 1253 1263 1297 1321 1339 1345 1352 1354 1357 1362 1374 1385
 1393 1406 1407 1410 1411 1412 1416 1419 1425 1426 1446 1467 1468 1477
 1480 1499 1501 1507 1543 1544 1559 1569 1595 1606 1611 1620 1623 1662
 1670 1671 1695 1703 1706 1726 1737 1765 1768 1786 1789 1805 1807 1820
 1831 1835 1



Train Index:  [   0    1    2 ... 6605 6606 6607] 

Test Index:  [  15   19   33   41   44   45   51   63   65   75  101  102  121  122
  124  135  144  149  168  172  177  181  199  216  221  227  231  240
  245  254  257  259  279  290  291  297  303  315  319  321  323  325
  333  347  349  354  371  376  380  408  410  418  420  422  426  437
  439  447  453  462  473  501  506  527  530  533  543  553  555  565
  576  577  586  598  599  620  626  641  652  655  662  672  691  705
  710  730  733  742  746  783  787  794  799  807  808  810  833  834
  864  879  881  893  907  926  957  964  969  977 1010 1018 1027 1032
 1044 1046 1056 1073 1075 1079 1092 1103 1119 1129 1153 1158 1192 1195
 1197 1199 1215 1219 1221 1225 1235 1244 1261 1272 1315 1323 1328 1330
 1335 1340 1360 1370 1371 1375 1397 1400 1405 1414 1420 1421 1433 1452
 1454 1471 1485 1487 1488 1498 1509 1511 1512 1514 1533 1535 1536 1545
 1550 1557 1562 1566 1593 1599 1608 1615 1616 1618 1632 1643 1650 1652
 1656 1657 1



Train Index:  [   0    1    2 ... 6605 6606 6607] 

Test Index:  [  12   29   30   47   57   69   71   73   87   90   95  100  106  107
  108  109  110  142  151  169  183  184  188  210  251  272  286  287
  292  293  295  307  336  339  344  367  387  414  415  416  421  429
  438  443  445  452  471  478  485  487  497  505  509  518  548  564
  584  589  605  613  624  627  642  653  654  677  693  701  711  721
  724  734  737  748  751  765  776  782  789  809  829  838  848  856
  861  877  889  896  903  911  915  925  927  931  937  944  947  952
  978 1020 1024 1025 1033 1041 1052 1055 1061 1083 1086 1087 1094 1095
 1109 1112 1117 1121 1128 1142 1168 1170 1172 1173 1186 1194 1203 1204
 1209 1210 1220 1224 1231 1258 1260 1268 1294 1295 1302 1305 1310 1319
 1322 1338 1344 1350 1351 1383 1430 1436 1443 1448 1451 1465 1483 1496
 1504 1513 1520 1539 1553 1554 1558 1561 1564 1575 1578 1580 1586 1600
 1612 1617 1630 1634 1654 1665 1666 1672 1684 1694 1702 1720 1721 1729
 1730 1732 1



Train Index:  [   0    1    2 ... 6604 6605 6606] 

Test Index:  [  26   43   49   62   68   70   83   86   92   99  103  111  113  120
  134  139  150  152  166  174  175  180  191  192  195  203  205  211
  212  214  218  219  220  229  238  252  270  274  278  283  289  298
  300  304  305  309  312  314  350  360  366  368  389  402  411  428
  450  451  457  461  463  479  480  489  495  498  507  508  516  517
  551  566  567  568  582  594  596  602  607  621  633  644  650  657
  668  670  679  680  684  690  708  718  720  725  731  741  744  745
  757  758  759  777  790  798  802  803  831  837  841  843  857  859
  868  873  887  890  897  898  945  949  993  999 1001 1029 1030 1068
 1071 1093 1096 1114 1126 1135 1151 1157 1164 1187 1189 1200 1211 1216
 1222 1230 1234 1255 1271 1281 1287 1288 1292 1293 1334 1349 1359 1361
 1379 1391 1392 1423 1424 1427 1432 1434 1437 1438 1456 1472 1476 1491
 1503 1505 1506 1526 1532 1534 1538 1583 1598 1609 1614 1647 1658 1661
 1688 1700 1



Train Index:  [   0    1    2 ... 6604 6605 6607] 

Test Index:  [   6   24   25   32   48   52   56   58   76   81   85  112  115  118
  136  138  159  170  194  222  234  256  266  276  299  326  328  330
  348  353  355  356  358  382  413  430  433  436  442  449  483  490
  494  540  557  573  579  615  618  643  647  648  656  664  665  674
  678  682  695  719  761  785  796  801  817  818  835  862  869  871
  888  908  910  912  941  973  990  998 1003 1006 1017 1023 1036 1051
 1053 1054 1064 1088 1102 1106 1111 1113 1116 1123 1130 1134 1137 1146
 1162 1178 1180 1196 1207 1227 1228 1236 1237 1238 1257 1259 1264 1270
 1303 1309 1313 1346 1378 1381 1399 1401 1402 1413 1415 1417 1444 1461
 1469 1474 1479 1482 1489 1497 1502 1510 1519 1522 1541 1548 1551 1552
 1556 1587 1588 1589 1590 1592 1610 1626 1627 1629 1675 1676 1691 1697
 1705 1714 1723 1736 1739 1746 1751 1767 1772 1776 1784 1808 1815 1833
 1850 1877 1878 1889 1890 1909 1932 1943 1946 1948 1972 1995 1999 2005
 2006 2011 2



Train Index:  [   1    2    3 ... 6605 6606 6607] 

Test Index:  [   0    7   18   39   67   82   97  104  123  131  141  155  162  173
  176  178  193  204  209  246  258  264  267  268  288  331  342  343
  370  383  386  396  397  423  434  446  459  484  493  500  528  531
  532  547  550  554  558  570  572  578  581  587  592  610  612  617
  631  636  639  640  658  660  669  687  694  700  727  729  771  773
  779  781  786  816  820  839  840  844  847  852  858  866  874  875
  880  883  904  905  909  923  929  930  942  948  958  965  967  976
  979  981  985 1011 1067 1085 1089 1091 1104 1105 1110 1127 1138 1143
 1152 1159 1171 1185 1201 1213 1226 1233 1251 1277 1278 1289 1298 1299
 1317 1320 1325 1336 1356 1366 1377 1395 1398 1422 1429 1431 1447 1450
 1455 1457 1464 1473 1490 1492 1515 1517 1530 1537 1563 1565 1572 1594
 1602 1621 1628 1641 1644 1649 1659 1677 1683 1698 1701 1710 1727 1731
 1749 1758 1774 1781 1783 1787 1790 1793 1800 1811 1812 1814 1821 1829
 1830 1842 1



Train Index:  [   0    1    2 ... 6605 6606 6607] 

Test Index:  [  11   20   22   35   42   59   72   78  128  140  147  163  182  187
  226  237  243  261  275  282  313  340  359  361  363  365  369  372
  377  392  398  399  405  406  407  424  440  444  448  456  460  464
  475  482  521  522  541  542  546  560  593  597  601  630  632  637
  649  651  685  689  697  702  703  707  714  735  739  767  772  780
  788  819  821  836  845  850  855  867  882  891  900  916  921  940
  961  962  970  972  986  997 1000 1004 1008 1009 1013 1014 1019 1026
 1031 1035 1037 1048 1070 1072 1078 1080 1099 1100 1124 1125 1132 1133
 1140 1177 1179 1190 1198 1206 1208 1229 1232 1239 1241 1246 1249 1283
 1284 1286 1326 1327 1337 1343 1347 1364 1367 1368 1372 1373 1376 1382
 1403 1404 1408 1428 1440 1442 1449 1459 1462 1475 1486 1494 1508 1518
 1523 1546 1549 1577 1582 1584 1601 1613 1624 1637 1638 1642 1651 1653
 1660 1667 1674 1681 1689 1692 1725 1735 1764 1766 1775 1778 1795 1798
 1799 1826 1



Train Index:  [   0    3    4 ... 6604 6606 6607] 

Test Index:  [   1    2   10   13   21   27   28   36   38   40   46   53   60   74
   89  105  129  145  148  153  158  165  171  186  207  213  215  236
  244  250  269  271  273  277  281  284  294  306  310  311  316  317
  329  338  352  374  390  394  400  403  409  427  431  441  454  458
  481  513  514  515  519  520  526  529  545  552  556  571  575  583
  588  590  591  609  614  619  628  629  634  661  686  692  704  715
  726  738  749  750  754  762  778  791  792  795  806  813  824  826
  849  884  885  886  892  902  906  913  914  917  920  933  934  935
  936  938  939  955  963  968  974  983  987  988 1007 1040 1043 1050
 1058 1098 1107 1131 1136 1165 1182 1240 1245 1247 1262 1265 1266 1269
 1273 1274 1279 1280 1285 1290 1301 1304 1307 1308 1316 1333 1341 1355
 1380 1386 1387 1389 1390 1394 1418 1435 1439 1441 1453 1458 1463 1466
 1521 1524 1525 1547 1555 1560 1567 1576 1579 1591 1596 1607 1622 1640
 1655 1690 1



Train Index:  [   0    1    2 ... 6605 6606 6607] 

Test Index:  [   3    5    9   54   55   66   77   94  117  125  126  133  137  164
  201  223  224  232  235  242  249  255  260  280  285  302  320  327
  341  345  357  362  364  375  384  385  388  404  419  455  467  470
  477  499  504  510  523  525  536  539  603  638  645  666  667  671
  673  688  713  716  722  723  732  760  768  770  774  793  797  814
  822  823  827  832  846  872  894  899  901  918  919  922  924  928
  943  946  950  951  953  966  984  989  992 1063 1065 1077 1118 1120
 1141 1145 1155 1156 1160 1169 1191 1202 1205 1214 1217 1248 1250 1252
 1254 1256 1276 1296 1311 1312 1314 1318 1324 1329 1331 1332 1342 1353
 1358 1384 1388 1396 1460 1481 1493 1531 1540 1542 1573 1574 1581 1603
 1604 1605 1619 1633 1639 1645 1646 1668 1673 1680 1719 1724 1753 1759
 1761 1792 1810 1823 1824 1827 1828 1834 1838 1848 1852 1854 1856 1857
 1863 1865 1887 1901 1906 1908 1917 1944 1958 1968 1969 1970 1975 1976
 1984 1990 1



Train Index:  [   0    1    2 ... 6605 6606 6607] 

Test Index:  [   4   16   34   64   98  114  116  119  127  130  143  146  154  160
  161  189  190  197  200  202  206  225  241  253  262  301  335  337
  379  391  395  412  417  466  474  488  492  502  512  524  537  559
  562  563  569  574  580  595  600  606  608  616  623  635  646  659
  663  675  698  699  709  717  728  740  753  766  769  775  784  804
  815  825  830  851  853  854  860  863  870  876  878  895  954  956
  959  971  975  980  991  995 1012 1015 1016 1021 1028 1045 1059 1060
 1062 1066 1069 1076 1081 1082 1122 1139 1147 1148 1150 1154 1166 1167
 1184 1218 1243 1267 1275 1282 1291 1300 1306 1348 1363 1365 1369 1409
 1445 1470 1478 1484 1495 1500 1516 1527 1528 1529 1568 1570 1571 1585
 1597 1625 1631 1635 1636 1648 1663 1664 1678 1679 1682 1685 1686 1687
 1693 1696 1707 1715 1722 1733 1734 1757 1794 1802 1806 1816 1819 1841
 1843 1853 1895 1899 1930 1931 1955 1959 1981 1982 1986 1998 2027 2038
 2041 2047 2



In [11]:
print(np.mean(scores))


0.9311400515228957


In [3]:
df.drop(['MO','DY'], axis = 1, inplace =True)