In [None]:
# !pip install --user stg 
# If you are running this notebook on Google Colab, please reset the current python environment via 'Runtime -> Restart runtime' after installation.

import sys
stg_path = '/home/eli/Eli/Projects/stg/python'
if stg_path not in sys.path:
    sys.path.append(stg_path)


In [None]:
from stg import STG
import numpy as np
import torch
import scipy.stats # for creating a simple dataset 
import matplotlib.pyplot as plt 
from sklearn.model_selection import train_test_split

In [None]:
# Create a simple dataset
def create_sin_dataset(n,p,f=2):
    x1=5*(np.random.uniform(0,1,n)).reshape(-1,1)
    x2=5*(np.random.uniform(0,1,n)).reshape(-1,1)
    x3=5*(np.random.uniform(0,1,n)).reshape(-1,1)
    x4=5*(np.random.uniform(0,1,n)).reshape(-1,1)
    # select = np.random.randint(0,2,n).reshape(-1,1)
    select = (np.sign(np.sin(2*np.pi*f*np.linspace(1e-10,1,n))).reshape(-1,1) + 1) / 2
    y1=np.sin(x1)**3*np.cos(x2)**3
    y2=np.sin(x3)**3*np.cos(x4)**3

    # y1=x1 + 2 * x2 / 5
    # y2=3 * x3- 2 * x4 / 5

    y=y1
    # y=np.sin(x1)*np.cos(x2)**3
    # y=select*y1 + (1-select)*y2
    plt.plot(np.linspace(0,1,n), select)
    # plt.scatter(x2[n//2:], y[n//2:] )
    # y = y1+y2
    relevant=np.hstack((x1,x2,x3,x4))
    noise_vector = scipy.stats.norm.rvs(loc=0, scale=1, size=[n,p-4])
    data = np.concatenate([relevant, noise_vector], axis=1)
    return data, y.astype(np.float32), select


n_size = 2000 #Number of samples
p_size = 20   #Number of features
X_data, y_data, select=create_sin_dataset(n_size,p_size)
print(X_data.shape)
print(y_data.shape)

In [None]:
X_data = np.random.normal(0,1,(11,2000))
y_data = np.exp(X_data[0]*X_data[1]) * (X_data[10] < 0) + np.exp(X_data[2]+X_data[3]+X_data[4]+X_data[5]-4) * (X_data[10] >= 0)
y_data = 1 / (1 + np.log(y_data / (1 - y_data))) > 0.5

X_data = X_data.transpose()


In [None]:
def plot_model(X_data, y_data):
    f,ax = plt.subplots(1,2,figsize=(10,5))

    ax[0].scatter(x=X_data[:,0], y=X_data[:,1], s=150, c=y_data.reshape(-1),alpha=0.4,cmap=plt.cm.get_cmap('RdYlBu'),)
    ax[0].set_xlabel('$x_1$',fontsize=20)
    ax[0].set_ylabel('$x_2$',fontsize=20)
    ax[0].set_title('Target y')
    ax[1].scatter(x=X_data[:,2], y=X_data[:,3], s=150, c=y_data.reshape(-1),alpha=0.4,cmap=plt.cm.get_cmap('RdYlBu'),)
    ax[1].set_xlabel('$x_3$',fontsize=20)
    ax[1].set_ylabel('$x_4$',fontsize=20)
    ax[1].set_title('Target y')
    plt.tick_params(labelsize=10)
    
plot_model(X_data, y_data)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, train_size=0.8)
X_train, X_valid, y_train, y_valid = train_test_split(X_train, y_train, train_size=0.9)

In [None]:
plt.scatter(X_train[:n_size//4,3], y_train[:n_size//4])

In [None]:
args_cuda = torch.cuda.is_available()
device = torch.device("cuda" if args_cuda else "cpu") 
feature_selection = True
model = STG(task_type='regression',input_dim=X_train.shape[1], output_dim=1, hidden_dims=[500 ,50, 10], activation='tanh',
    optimizer='SGD', learning_rate=0.1, batch_size=X_train.shape[0], feature_selection=feature_selection, sigma=0.5, lam=0.1, random_state=1, device=device, extra_args={'gating_net_hidden_dims':[200,200]}) 

In [None]:
# model._model

In [None]:
# model = STG(task_type='regression',input_dim=X_train.shape[1], output_dim=1, hidden_dims=[500, 50, 10], activation='tanh',
#            optimizer='SGD', learning_rate=1e-1, batch_size=X_train.shape[0], sigma=0.5, lam=0.1, random_state=1)

In [None]:
for pm in model._optimizer.param_groups:
    pm['lr'] = 0.01

In [None]:
model._optimizer

In [None]:
model.fit(X_train, y_train, nr_epochs=20000, valid_X=X_valid, valid_y=y_valid, print_interval=1000)

In [None]:
t_len = len(X_data)
X_data1, X_data2 = X_data[(select==0).reshape(-1)], X_data[(select==1).reshape(-1)]
y_data1, y_data2 = y_data[(select==0).reshape(-1)], y_data[(select==1).reshape(-1)]

X_train1, X_test1, y_train1, y_test1 = train_test_split(X_data1, y_data1, train_size=0.8)
X_train1, X_valid1, y_train1, y_valid1 = train_test_split(X_train1, y_train1, train_size=0.9)
X_train2, X_test2, y_train2, y_test2 = train_test_split(X_data2, y_data2, train_size=0.8)
X_train2, X_valid2, y_train2, y_valid2 = train_test_split(X_train2, y_train2, train_size=0.9)

plot_model(X_train1, y_train1)
plot_model(X_train2, y_train2)

In [None]:
model1 = STG(task_type='regression',input_dim=X_train1.shape[1], output_dim=1, hidden_dims=[500 ,50, 10], activation='tanh',
    optimizer='SGD', learning_rate=0.1, batch_size=X_train1.shape[0], feature_selection=feature_selection, sigma=0.5, lam=0.1, random_state=1, device=device) 

model1.fit(X_train1, y_train1, nr_epochs=3000, valid_X=X_valid, valid_y=y_valid, print_interval=1000)


model2 = STG(task_type='regression',input_dim=X_train2.shape[1], output_dim=1, hidden_dims=[500 ,50, 10], activation='tanh',
    optimizer='SGD', learning_rate=0.1, batch_size=X_train2.shape[0], feature_selection=feature_selection, sigma=0.5, lam=0.1, random_state=1, device=device) 

model2.fit(X_train2, y_train2, nr_epochs=3000, valid_X=X_valid, valid_y=y_valid, print_interval=1000)

In [None]:
model1.get_gates(mode='prob')

In [None]:
model2.get_gates(mode='prob')

In [None]:
model1._model.FeatureSelector(torch.from_numpy(X_train1[0]))

In [None]:
model2._model.FeatureSelector(torch.from_numpy(X_train1[0]))

In [None]:
feature_selectionimport torch 
tmp_loss = torch.nn.MSELoss() 
tmp_loss(torch.Tensor(y_train), torch.Tensor(model.predict(X_train)))

In [None]:
tmp_loss(torch.Tensor(y_test), torch.Tensor(model.predict(X_test))) 

In [None]:
model.get_gates(mode='prob') 

In [None]:
model.get_gates(mode='raw')

In [None]:
model.get_gates(mode='raw').argsort()[::-1]

## Testing the model

In [None]:
model._model.FeatureSelector._parameters

In [None]:
f,ax = plt.subplots(2,2,figsize=(10,10)) 
y_pred=model.predict(X_train)
ax[0,0].scatter(x=X_train[:,0], y=X_train[:,1], s=150, c=y_train.reshape(-1),alpha=0.4,cmap=plt.cm.get_cmap('RdYlBu'),)
ax[0,0].set_xlabel('$x_1$',fontsize=10)
ax[0,0].set_ylabel('$x_2$',fontsize=10)
ax[0,0].set_title('Target y')
ax[0,1].scatter(x=X_train[:,0], y=X_train[:,1], s=150, c=y_pred.reshape(-1),alpha=0.4,cmap=plt.cm.get_cmap('RdYlBu'),)
ax[0,1].set_xlabel('$x_1$',fontsize=10)
ax[0,1].set_ylabel('$x_2$',fontsize=10)
ax[0,1].set_title('Regression output ')

y_pred_test=model.predict(X_test)

ax[1,0].scatter(x=X_test[:,0], y=X_test[:,1], s=150, c=y_test.reshape(-1),alpha=0.4,cmap=plt.cm.get_cmap('RdYlBu'),)
ax[1,0].set_xlabel('$x_1$',fontsize=10)
ax[1,0].set_ylabel('$x_2$',fontsize=10)
ax[1,0].set_title('Target y test')
ax[1,1].scatter(x=X_test[:,0], y=X_test[:,1], s=150, c=y_pred_test.reshape(-1),alpha=0.4,cmap=plt.cm.get_cmap('RdYlBu'),)
ax[1,1].set_xlabel('$x_1$',fontsize=10)
ax[1,1].set_ylabel('$x_2$',fontsize=10)
ax[1,1].set_title('Regression output test')
plt.tick_params(labelsize=5)

In [None]:
np.abs(y_train - y_pred).mean(), np.abs(y_test-y_pred_test).mean()

## Model saving / loading

In [None]:
# model.save_checkpoint('trained_model.pt')

In [None]:
# model_tmp = STG(task_type='regression',input_dim=X_train.shape[1], output_dim=1, hidden_dims=[500, 50, 10], activation='tanh',
#     optimizer='SGD', learning_rate=0.1, batch_size=X_train.shape[0], feature_selection=feature_selection, sigma=0.5, lam=0.1, random_state=1, device=device) 

In [None]:
# model_tmp.load_checkpoint('trained_model.pt')