### 00 Environment

In [1]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [2]:
%%capture
!pip install lightgbm

In [3]:
import lightgbm as lgb
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

from IPython.display import clear_output

#Path to my modules
import sys
from model_eval import *

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


### 01 Creating our Example Target Variables

In [4]:
#Generating our dataset
x_train = np.random.uniform(low=-2, high=2,size=100)
y_train = x_train*x_train*x_train

#Creating the dataframe
data = pd.DataFrame(data={"x":x_train, "y":y_train})
data.sort_values(by="x",inplace=True)

#Plotting
p = figure(plot_width=600, plot_height=400, x_axis_label="x",y_axis_label="y")
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.circle(data.x,data.y, line_width=2, legend="Actual")
show(p)



### 02 Learning Rate Demonstration

In [5]:
"""
Fitting our Model
"""

#LightGBM requires a special dataset configuration
lgb_data = lgb.Dataset(data = pd.DataFrame(data.x), label = pd.DataFrame(data.y))


#Fitting our model
params = {
          "learning_rate":1,
          "max_depth":1,
          "min_data_in_leaf":5
         }


"""
Plotting Actual vs Predicted
"""

lr_model = lgb.train(params, lgb_data, num_boost_round=2)

data["prediction"] = lr_model.predict(pd.DataFrame(data.x))
"""
Plotting Actual vs Predicted
"""
clear_output(wait=True)
#Plotting
p = figure(plot_width=600, plot_height=400, x_axis_label="x",y_axis_label="y")
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.line(data.x,data.y, line_width=2, legend="Actual")
p.line(data.x,data.prediction, line_width=2, legend="Prediction", color="red")
show(p)



### 03 Depth Demonstration

In [6]:
#Generating our dataset
x_train = np.random.uniform(low=0, high=3,size=1000)
y_train = ((x_train > 1) & (x_train < 2)).astype(int) 

#Creating the dataframe
data = pd.DataFrame(data={"x":x_train, "y":y_train})
data.sort_values(by="x",inplace=True)

#Plotting
p = figure(plot_width=600, plot_height=400, x_axis_label="x",y_axis_label="y")
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.line(data.x,data.y, line_width=2, legend="Actual")
show(p)



In [7]:
"""
Fitting our Model
"""

#LightGBM requires a special dataset configuration
lgb_data = lgb.Dataset(data = pd.DataFrame(data.x), label = pd.DataFrame(data.y))


#Fitting our model
params = {
          "learning_rate":1,
          "max_depth":2,
          "min_data_in_leaf":5
         }

for training_rounds in range(1,2):
    lr_model = lgb.train(params, lgb_data, num_boost_round=training_rounds)

    data["prediction"] = lr_model.predict(pd.DataFrame(data.x))


    """
    Plotting Actual vs Predicted
    """
    clear_output(wait=True)
    #Plotting
    p = figure(plot_width=600, plot_height=400, x_axis_label="x",y_axis_label="y")
    p.xaxis.axis_label_text_font_size = "14pt"
    p.yaxis.axis_label_text_font_size = "14pt"
    p.line(data.x,data.y, line_width=2, legend="Actual")
    p.line(data.x,data.prediction, line_width=2, legend="Prediction", color="red")
    show(p)



### 04 Overfitting Minimization Demonstration

In [8]:
#Generating our dataset
x_train = np.random.uniform(low=-2, high=2,size=100)
y_train = x_train*x_train*x_train + np.random.normal(scale=2,size=100)

#Creating the dataframe
data = pd.DataFrame(data={"x":x_train, "y":y_train})
data.sort_values(by="x",inplace=True)

#Plotting
p = figure(plot_width=600, plot_height=400, x_axis_label="x",y_axis_label="y")
p.xaxis.axis_label_text_font_size = "14pt"
p.yaxis.axis_label_text_font_size = "14pt"
p.circle(data.x,data.y, line_width=2, legend="Actual")
show(p)



In [9]:
"""
Fitting our Model
"""

#LightGBM requires a special dataset configuration
lgb_data = lgb.Dataset(data = pd.DataFrame(data.x), label = pd.DataFrame(data.y))

"""
Plotting Actual vs Predicted
"""
for min_gain in np.arange(0.0,25,1):
    params = {
          "learning_rate":0.1,
          "max_depth":8,
          "min_data_in_leaf":5,
          "min_gain_to_split":min_gain
         }
    
    lr_model = lgb.train(params, lgb_data, num_boost_round=50)

    data["prediction"] = lr_model.predict(pd.DataFrame(data.x))


    """
    Plotting Actual vs Predicted
    """
    clear_output(wait=True)
    #Plotting
    p = figure(plot_width=600, plot_height=400, x_axis_label="x",y_axis_label="y")
    p.xaxis.axis_label_text_font_size = "14pt"
    p.yaxis.axis_label_text_font_size = "14pt"
    p.circle(data.x,data.y, line_width=2, legend="Actual")
    p.line(data.x,data.prediction, line_width=2, legend="Prediction", color="red")
    show(p)

