# Imports

In [None]:
from pandas import read_csv
from matplotlib import pyplot
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import StratifiedKFold
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from pandas.plotting import scatter_matrix
from datetime import datetime
from pandas import concat
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from datetime import date
from numpy import array

# Getting data

In [None]:
data = "seattle-weather.csv"
dataset = read_csv(data)
dataset.head()

# Exporing what todo with the data

In [None]:
print(dataset.describe())

## Arranging our data for training

In [None]:
print("DF Dimensions:", dataset.shape)
dataset.dtypes

In [None]:
dataset.hist()
pyplot.show()

In [None]:

scatter_matrix(dataset)
pyplot.show()

### dealing with date

In [None]:
# Splitting date from the - symbol
dateDF = dataset['date']
dateDF = dateDF.str.split('-',expand=True)
dateDF = dateDF.rename(columns={0:'Year',1:'Month',2:'Day'})
dateDF.head()


In [None]:
#concat new dates with main features

print(dataset.iloc[:,1:])
weatherDF = concat([dateDF,dataset.iloc[:,1:]],join='outer',axis=1)
weatherDF

### Setting up Data from X and Y and training

In [None]:
arrayWeather = weatherDF.values
X = arrayWeather[:,1:-1]
y = arrayWeather[:,-1]

X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1, shuffle=True)

# Create models To be used

### Choose what model to use

In [None]:
#setting models in a array
models = []
models.append(('LR', LogisticRegression(solver='liblinear', multi_class='ovr')))
models.append(('LDA', LinearDiscriminantAnalysis()))
models.append(('KNN', KNeighborsClassifier()))
models.append(('CART', DecisionTreeClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC(gamma='auto')))

In [None]:
results = []
names = []
for name, model in models:
	kfold = StratifiedKFold(n_splits=10, random_state=1, shuffle=True)
	cv_results = cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
	results.append(cv_results)
	names.append(name)
	print('%s: %f (%f)' % (name, cv_results.mean(), cv_results.std()))

In [None]:
pyplot.boxplot(results, labels=names)
pyplot.title('Algorithm Comparison')
pyplot.show()

Logistic regression and GaussianNB are the best models 

In [None]:



model = GaussianNB()
model.fit(X_train, Y_train)
print(type(X_validation[0]))
predictions = model.predict(X_validation)
print(type(X_validation[0]))

# Evaluate predictions
print(accuracy_score(Y_validation, predictions))
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))

In [None]:
from numpy import array
inputData = array([['9','4',10.9,10.6,2.8,4.5]],dtype=float)#.reshape(-1,1)
modelTest = GaussianNB()
modelTest.fit(X_train, Y_train)
predictionsTest = modelTest.predict(inputData)
print('The predicted value from the model is:',predictionsTest[0])
# Evaluate predictions
print('With an accuracy of %s percent.'%(accuracy_score(Y_validation, predictions)*100))


# Creating a Interactive module for testing the model

In [None]:

def getInputs(useSysDate = True):
    if useSysDate == True:  
        curDay = date.today()
        month = curDay.month
        day = curDay.day
    if useSysDate == False or (month == '' or day == ''):
        month = input('Enter the current month in numeric form(e.g. 09 = september):')
        day = input('Enter the current day of the month:')
        while len(month) != 2:
            #needs to be in format of '09' or '12'
            month = input('Enter the current month in numeric form(e.g. 09 = september):')

        while len(day) != 2:
            #needs to be in format of '09' or '30'
            day = input('Enter the current day of the month:')

    #------- precipitation --------
    precipInput = input('Enter the precipitation')
    while float(precipInput) < 0.00:
        precipInput = input('Enter the precipitation')

    #-------- minTemp --------
    minTempInput = input('Enter the minimum temp')
    while minTempInput == '':
        minTempInput = input('Enter the minimum temp')

    #-------- minTemp --------
    maxTempInput = input('Enter the maximum temp')
    while maxTempInput == '' or maxTempInput < minTempInput:
        maxTempInput = input('Enter the maximum temp')

    #-------- wind ---------
    windInput = input('Enter the wind')
    while windInput == '':
        windInput = input('Enter the wind')
    
    return month,day,precipInput,minTempInput,maxTempInput,windInput

def predictCurWeather(inputList = [],askInput = True):
    if askInput == True:
        inputParams = getInputs()
    else:
        if len(inputList) != 7:
            inputParams = getInputs()
        else:
            inputParams = inputList

    data = "seattle-weather.csv"
    dataset = read_csv(data)
    dateDF = dataset['date']
    dateDF = dateDF.str.split('-',expand=True)
    dateDF = dateDF.rename(columns={0:'Year',1:'Month',2:'Day'})
    weatherDF = concat([dateDF,dataset.iloc[:,1:]],join='outer',axis=1)
    arrayWeather = weatherDF.values
    X = arrayWeather[:,1:-1]
    y = arrayWeather[:,-1]

    X_train, X_validation, Y_train, Y_validation = train_test_split(X, y, test_size=0.20, random_state=1, shuffle=True)


    model = GaussianNB()
    model.fit(X_train, Y_train)
    predictionsTest = model.predict(array([inputParams],dtype = float))
    print('The predicted value from the model is:',predictionsTest[0])
    # Evaluate predictions
    print('With an accuracy of %s percent.'%(accuracy_score(Y_validation, predictions)*100))
    


    


predictCurWeather()
