In [15]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsRegressor
from sklearn.mixture import GaussianMixture

In [2]:
def import_dataset(normalised=True,scaleMethod='Standard'):
    '''
    Imports Dataset and returns either scaled values depending upon user inputs
    
    Input:
        normalised -- boolean depending upon whether the user wants to scale the values
        scaleMethod -- Type of scaler to be used if normalised is True
    
    Output:
        (X_train,X_test,Y_train,Y_test) -- the training and testing dataset
        scaler -- used to perform inverse transform if dataset is scaled
    '''
    data = pd.read_csv('MLDataset.csv')
    dataS = data.drop('Unnamed: 0',axis=1)
    
    if normalised == False:
        scaler = 'None'
        X = dataS.iloc[:,0:3].values
        Y = dataS.iloc[:,3:].values
        X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=0)
        
    elif scaleMethod == 'Standard':
        scaler = StandardScaler()
        scaler.fit(dataS)
        dataS = scaler.transform(dataS)
        X = dataS[:,0:3]
        Y = dataS[:,3:]
        X_train,X_test,Y_train,Y_test = train_test_split(X,Y,test_size=0.15,random_state=0)
    
    
    return X_train,X_test,Y_train,Y_test,scaler

In [3]:
def inverseTransform(scaler,*arr):
    '''
    Used to perform Inverse Transformation on normalised dataset
    
    Input:
        scaler -- Instance of Normaliser used
        *arr -- list of arrays to be concatenated
    '''
    data = np.concatenate(arr,axis=1)
    data = pd.DataFrame(data)
    arrInverse = scaler.inverse_transform(data)
    
    return arrInverse

In [12]:
def cost(y_test,y_pred):
    '''
    Calculates error of the model
    '''
    error = (y_test-y_pred)/y_test
    error = np.sum(abs(error))/(y_test.shape[0]*y_test.shape[1])*100
    
    return error

## K Neighbors Regressor 

In [None]:
Rscore = {}
for normalise in [True,False]:
    X_train,X_test,Y_train,Y_test,scaler = import_dataset(normalised=normalise)
    for nbr in range(2,100):
        for wgt in ['uniform', 'distance']:
            Model = KNeighborsRegressor(n_neighbors=nbr,weights=wgt)
            Model.fit(X_train,Y_train)
            Ytr_pred = Model.predict(X_train)
            Yts_pred = Model.predict(X_test)
            
            if normalise == False:
                error = cost(Y_test,Yts_pred)
                error_tr = cost(Y_train,Ytr_pred)
            else:
                true = inverseTransform(scaler,X_train,Y_train)
                pred = inverseTransform(scaler,X_train,Ytr_pred)
                
                error_tr = cost(true[:,3:],pred[:,3:])
                
                true = inverseTransform(scaler,X_test,Y_test)
                pred = inverseTransform(scaler,X_test,Yts_pred)
                
                error = cost(true[:,3:],pred[:,3:])
#             valTst = Model.score(X_test,Y_test)
#             val = Model.score(X_train,Y_train)
            param = {normalise,nbr,wgt}
            Rscore[(error,error_tr)]=param

In [None]:
Rscore

## Gaussian Mixture Model

In [None]:
Rscore = {}
for normalise in [True,False]:
    X_train,X_test,Y_train,Y_test,scaler = import_dataset(normalised=normalise)
    for cpt in range(2,4):
        for cvr in ['full', 'tied','diag','spherical']:
            Model = GaussianMixture(n_components=cpt,covariance_type=cvr)
            Model.fit(X_train,Y_train[:,[0]])
            Ytr_pred = Model.predict(X_train)
            Yts_pred = Model.predict(X_test)
            Ytr_pred = Ytr_pred.reshape(Ytr_pred.shape[0],1)
            Yts_pred = Yts_pred.reshape(Yts_pred.shape[0],1)
            if normalise == False:
                error = cost(Y_test[:,[0]],Yts_pred[:,[0]])
                error_tr = cost(Y_train[:,[0]],Ytr_pred[:,[0]])
            else:
                true = inverseTransform(scaler,X_train,Y_train)
                pred = inverseTransform(scaler,X_train,Ytr_pred,Y_train[:,1:])
                
                error_tr = cost(true[:,3:],pred[:,3:])
                
                true = inverseTransform(scaler,X_test,Y_test)
                pred = inverseTransform(scaler,X_test,Yts_pred,Y_test[:,1:])
                
                error = cost(true[:,3:],pred[:,3:])
#             valTst = Model.score(X_test,Y_test)
#             val = Model.score(X_train,Y_train)
            param = {normalise,cpt,cvr}
            Rscore[(error,error_tr)]=param

## Artificial Neural Network