# Liquidity Regressor Machine Learning Initial Framework
A framework for predicting the optimal level of liquidity for a company. 

In [None]:
# Import initially needed libraries

In [1]:
import numpy as NP
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

In [2]:
# Import my preferred ML library Scikit-Learn

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
#Import Sample Data CSV 

liquidity_data = pd.read_csv('Sample Data in GitHub Repo') # Markdown Cell. Must change

In [6]:
# Data Engineering Check

In [None]:
liquidity_data.describe()

In [None]:
# Define new DataFrames named "inputs" containing only the input features and "test" for test features
target = liquidity_data.available_liquidity

inputs = liquidity_data.drop('available_liquidity', axis=1)

In [None]:
# Split the data and pass the results to a new object named "results"
results = train_test_split(inputs, target, test_size = 0.2, random_state = 1)

In [None]:
# Data integrity check to ensure the total rows matches from our summary statistics and data is divided properly
print(type(results))
print(len(results))
print('--%--')
for item in results:
    print(item.shape)

In [None]:
# Prepare data for ingestion for our chosen model 

In [None]:
input_train, input_test, target_train, target_test = results
print(input_train.shape)
print(input_test.shape)
print(target_train.shape)
print(target_test.shape)
# confirm the shape of each new variable ready for testing is the same as our prior check

In [None]:
# Import required functions from Scikit-Learn
from sklearn.linear_model import Lasso, Ridge
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Create pipelines in a dictionary with model pipelines for Lasso and Ridge

pipelines = {
    'lasso' : make_pipeline(StandardScaler(), Lasso(random_state=1)),
    'ridge' : make_pipeline(StandardScaler(), Ridge(random_state=1))
}

In [None]:
# add new pipeline to the pipeline dictionary

from sklearn.linear_model import ElasticNet

pipelines['enet'] = make_pipeline(StandardScaler(), ElasticNet(random_state=1))

from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor

pipelines['rf'] = make_pipeline(StandardScaler(), RandomForestRegressor(random_state=1))
pipelines['gb'] = make_pipeline(StandardScaler(), GradientBoostingRegressor(random_state=1))

In [None]:
# Progress check
# Ensure pipelines are set up correctly 
for key, value in pipelines.items():
    print(key, type(value))

### Hyperparameter Tuning

In [None]:
# Create a hyperparameter grid for Lasso
lasso_hyperparameters = {
    'lasso__alpha' : [0.01,0.05,0.1,0.5,1,5]
    # the model class name followed by 2 underscores then the hp name
}

In [None]:
# Create a hyperparameter grid for Ridge
ridge_hyperparameters = {
    'ridge__alpha' : [0.01,0.05,0.1,0.5,1,5]
}

In [None]:
# Create a hyperparameter grid for Elastic Net
enet_hyperparameters = {
    'elasticnet__alpha' : [0.01, 0.05, 0.1, 0.5, 1, 5],
    'elasticnet__l1_ratio' : [0.1, 0.3, 0.5, 0.7, 0.9] 
}

In [None]:
# Create a hyperparameter grid for Random Forest
rf_hyperparameters = {
    'randomforestregressor__n_estimators' : [100,200], # n estimators is the number of decision trees
    'randomforestregressor__max_features' : ['auto', 0.3, 0.6] # tells the model how many features it's allowed to use to prevent overfitting
}

In [None]:
# Create a hyperparameter grid for Gradient Booster
gb_hyperparameters = {
    'gradientboostingregressor__n_estimators' : [100, 200],
    'gradientboostingregressor__learning_rate' : [0.05, 0.1, 0.2],
    'gradientboostingregressor__max_depth' : [1, 3, 5]
}

In [None]:
# Create the hyperparameter_grids dictionary
hyperparameter_grids = {
    'lasso' : lasso_hyperparameters,
    'ridge' : ridge_hyperparameters,
    'enet' : enet_hyperparameters,
    'rf' : rf_hyperparameters,
    'gb' : gb_hyperparameters
}

In [None]:
# Run the code below to make sure everything is set up correctly
for key in ['enet', 'gb', 'ridge', 'rf', 'lasso']:
    if key in hyperparameter_grids:
        if type(hyperparameter_grids[key]) is dict:
            print( key, 'was found, and it is a grid.' )
        else:
            print( key, 'was found, but it is not a grid.' )
    else:
        print( key, 'was not found')

### Cross Validation

In [None]:
# Import GridSearchCV
from sklearn.model_selection import GridSearchCV

#### Creating Untrained ML Model

In [None]:
untrained_lasso_model = GridSearchCV(pipelines['lasso'], hyperparameter_grids['lasso'], cv=5)

In [None]:
models = {}

In [None]:
for key in pipelines.keys():
         models[key] = GridSearchCV(pipelines[key],hyperparameter_grids[key], cv=5)

In [None]:
models.keys() # to check models dictionary has been labeled correctly

## Training & Tuning the Machine Learning Model

In [None]:
for key in models.keys():
    models[key].fit(input_train,target_train)
    print(key,'is trained and tuned')

## Select A Winning Model

In [None]:
# Import the r-squared and mean absolute error metrics
from sklearn.metrics import r2_score, mean_absolute_error

In [None]:
# Compare test predictions agains actual target variable values
preds = {}

for key in models: 
    preds = models[key].predict(input_test)
    print(key)
    print('R-Squared:', round(r2_score(target_test,preds), 3))
    print('MAE:', round(mean_absolute_error(target_test,preds), 3))
    print('---Divider---')

In [None]:
# *** Gradient Booster Should be the winning model from the sample data *** #

In [None]:
preds = models['gb'].predict(input_test)
plt.scatter(preds, target_test)

plt.xlabel('predicted')
plt.ylabel('actual')
plt.show()

In [None]:
# In using the model to advise clients,
#  pass the model a dataframe in the same format as the training, 
#   but without the column containing the target variable, and one line with client data. 
#    Then, pass the dataframe to the trained model to calculate the model's prediction