In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import os
import math

In [2]:
%matplotlib inline

In [3]:
from utils.config import DATAFOLDER, SVM_PARAM, OPTIMIZER
from models.Regressor import getmodel, myCallbacks

In [4]:
file_loc = os.path.join(DATAFOLDER, "main.csv")
df = pd.read_csv(file_loc)

In [5]:
outlier = []
for counter, i in enumerate(df['Wind speed']):
    if i > 10.0:
        outlier.append(counter)
print(outlier)
df = df.drop(outlier,axis=0)

[2314, 2448, 3062]


In [6]:
df.head()

Unnamed: 0,Rainfall,Max Temperature,Min Temperature,Maximum Humidity,Min Humidity,Wind Direction,CC,EVP,Wind speed,Solar Radiation,date
0,0.0,17.0,6.8,92.0,65.0,125.0,2.0,0.5,1.0,13.475,1/1
1,0.0,18.0,6.8,97.0,64.0,125.0,2.0,0.0,1.2,13.475,1/2
2,0.0,17.5,6.8,98.0,74.0,130.0,2.0,0.0,0.8,13.475,1/3
3,0.0,19.2,10.4,97.0,50.0,360.0,2.0,0.8,1.2,13.475,1/4
4,0.0,20.3,10.4,95.0,58.0,270.0,2.0,1.2,1.2,13.475,1/5


In [8]:
df = df.drop(columns=['date',"CC"]).dropna()


## Averagin min and max parameter
max_t = df["Max Temperature"]
min_t = df["Min Temperature"]
min_h = df['Min Humidity']
max_h = df["Maximum Humidity"]

t = (max_t.values + min_t.values)/2
h = (min_h.values + max_h.values)/2

df = df.drop(columns=["Max Temperature","Min Temperature",'Min Humidity',"Maximum Humidity"])
df["Temperature"] = t
df["Humidity"] = h

ValueError: Length of values does not match length of index

In [None]:
fig = plt.figure(figsize=(20,20))
ax = fig.add_subplot(111, projection='polar')
c = ax.scatter([x*math.pi/180 for x in df["Wind Direction"]], df["Wind speed"], c=df["Solar Radiation"], cmap='hsv', alpha=0.5)

In [None]:
df.corr()

In [None]:
f = plt.figure(figsize=(19, 15))
ax = f.add_subplot(111)
cax = ax.matshow(df.corr(), interpolation='nearest')
f.colorbar(cax)
ax.set_xticks(range(0,10))
ax.set_yticks(range(0,10))
ax.set_xticklabels(" " + df.corr().columns, rotation=45)
ax.set_yticklabels(" " + df.corr().columns)
plt.show()

## Preprocessing of data

In [None]:
Y = df["Wind speed"]
X = df.drop(columns=["Wind speed"])

In [None]:
x = X.values #returns a numpy array
min_max_scaler = MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
X = pd.DataFrame(x_scaled)

In [None]:
# y = np.reshape(Y.values, (len(Y.values),1))
# min_max_scaler = MinMaxScaler()
# y_scaled = min_max_scaler.fit_transform(y)
# Y = pd.DataFrame(y_scaled)

In [None]:
X.describe()

In [None]:
Y.describe()

## Normalised prediction using features

In [None]:
from utils.helper import Plotter
from sklearn.model_selection import train_test_split
plotter = Plotter()

In [None]:
X_val = X[-10:]
Y_val = Y[-10:]



X_train, X_test, Y_train, Y_test = train_test_split(X[0:-10],Y[0:-10], test_size=0.2, random_state = 42)

In [None]:
print(X_train.shape)
print(Y_train.shape)
print(X_test.shape)
print(Y_test.shape)

In [None]:
from sklearn import svm
for i in SVM_PARAM:
    print(i)
    svr = svm.SVR(C=i['c'], epsilon=i['e'], gamma="auto", coef0=i['s'])
    svr.fit(X_train, Y_train)
    plotter.find_accurracy_on_testset(svr, X_test, Y_test,clip=True, plot=True)
#     plotter.find_accurracy_on_testset(svr, X_val, Y_val,clip=False, plot= True)

In [None]:
for i in OPTIMIZER:
    print("------------------------------------------\n" + str(i).split("(")[0].split(".")[-1].split(" ")[0]+ "\n------------------------------------------\n")
    model = getmodel(i)
    _ = model.fit(X_train.values, Y_train.values,
          batch_size=64,
          steps_per_epoch=25,
          epochs=25,
          verbose=0,
          callbacks=[myCallbacks()])
    plotter.find_accurracy_on_testset(model, X_test, Y_test,clip=True, plot=True)
    print("\n\n\n\n")