# Imports and Installs

In [0]:
# Imports
import pandas as pd
import numpy as np 
import pandas_profiling 
from sklearn import preprocessing # for category encoder
from sklearn.neighbors import NearestNeighbors
from sklearn.model_selection import train_test_split
# much more efficient for larger files like Nearest Neighbors which the model
import joblib

In [0]:
# Read in data
df = pd.read_csv('https://raw.githubusercontent.com/aguilargallardo/DS-Unit-2-Applied-Modeling/master/data/SpotifyFeatures.csv')


df = df.dropna() # drop null values

In [0]:
df.shape

## Neural Network

#### Preprocessing

In [0]:
time_sig_encoding = { '0/4' : 0, '1/4' : 1, 
                     '3/4' : 3, '4/4' : 4,
                     '5/4' : 5}

key_encoding = { 'A' : 0, 'A#' : 1, 'B' : 2,
                'C' : 3,  'C#' : 4,  'D' : 5,
                'D#' : 6, 'E' : 7, 'F' : 8,
                'F#' : 9, 'G' : 10, ' G#' : 11 }

mode_encoding = { 'Major':0, 'Minor':1}      

df['key'] = df['key'].map(key_encoding)
df['time_signature'] = df['time_signature'].map(time_sig_encoding)
df['mode'] = df['mode'].map(mode_encoding)

# helper function to one hot encode genre

def encode_and_bind(original_dataframe, feature_to_encode):
    dummies = pd.get_dummies(original_dataframe[[feature_to_encode]])
    res = pd.concat([original_dataframe, dummies], axis=1)
    return(res)

df = encode_and_bind(df, 'genre')

df = df.dropna() # drop null values again not sure why it created null values

In [0]:
# check worked out
df.dtypes

# MODELING: Nearest Neighbors
resources: https://scikit-learn.org/stable/modules/neighbors.html

In [0]:
neigh = NearestNeighbors()

In [0]:
# to remove the transformed columns from model 
remove = ['key', 'mode','time_signature']
features = [i for i in list(df.columns[4:]) if i not in remove]
# target = 'track_id'

In [0]:
X = df[features]
# y = df[target]

X.shape, #y.shape

In [0]:
neigh.fit(X) # NN doesn't need to fit Y

### Autoencoder

In [0]:
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.models import Model

In [0]:
encoding_dim = 32  # 32 floats -> compression of factor 24.5, assuming the input is 784 floats

In [0]:
from sklearn.neighbors import NearestNeighbors

nn = NearestNeighbors(n_neighbors=10, algorithm='ball_tree')
nn.fit(encoded_imgs)

#### Nicole's Imported Model

In [0]:
# K nearest neighbors NN https://www.reddit.com/r/MachineLearning/comments/2f8jff/using_neural_networks_for_nearest_neighbor/

In [0]:
 import numpy as np

In [0]:
 # feed foward neural network, multi-lauyer perceptron
np.random.seed(812)

# input layer- 3 inputs: hours studying, hours sleep
X = np.array(([0,0,1],
              [0,1,1],
              [1,0,1],
              [0,1,0],
              [1,0,0],
              [1,1,1],
              [0,0,0]), dtype=float)

# Exam Scores
y = np.array(([0],
              [1],
              [1],
              [1],
              [1],
              [0],
              [0],
              [0]), dtype=float)

In [0]:
# Feature normalization
# Normalizing Data on feature (because or model will train faster)
# Neural Network would probably do this on its own, but it will help us converge on a solution faster
X = X / np.amax(X, axis=0)
y = y / 100

print("Studying, Sleeping \n", X)
print("Test Score \n", y)

In [0]:
# neural network class for function (REVIEW THIS CELL)
class NeuralNetwork:
    def __init__(self):
        # Set up Architecture of Neural Network
        self.inputs = 3
        self.hiddenNodes = 4
        self.outputNodes = 1

        # Initial Weights
        # 3x7 Matrix Array for the First Layer: inputs to hidden
        self.weights1 = np.random.rand(self.inputs, self.hiddenNodes)
       
        # 7x1 Matrix Array for Hidden to Output
        self.weights2 = np.random.rand(self.hiddenNodes, self.outputNodes)

# Export Model with Joblib

In [0]:
filename = 'NearestNeighbor.sav'

In [0]:
joblib.dump(neigh, filename)