### Imports

In [1]:
import numpy as np
from sklearn.linear_model import Lasso 

### Data loading

In [2]:
A_data = np.loadtxt('../data/ala2/ala2_stateA.dat')
B_data = np.loadtxt('../data/ala2/ala2_stateB.dat')
with open('../data/ala2/ala2_stateA.dat', 'r') as f:
    header = f.readline()
columns = header[2:-1].split(' ') # Ignore comment symbol and \n at the end

### Dataset preparation

In [3]:
X_data = np.vstack([A_data, B_data])
Y_data = np.hstack([
    np.ones(A_data.shape[0]),
    -np.ones(B_data.shape[0])
])
perm = np.random.permutation(Y_data.shape[0])
#Shuffling
X_data = X_data[perm,1:] #Ignoring time
Y_data = Y_data[perm]

### Model training

In [19]:
alpha = 0.1 #Adjusting this changes the number of selected features (low α = high number of features)
model = Lasso(alpha=alpha)

In [20]:
model.fit(X_data,Y_data)

Lasso(alpha=0.1)

In [21]:
print("The relevant features are:")
w_norm = np.linalg.norm(model.sparse_coef_.data)
for idx in model.sparse_coef_.indices:
    coeff = np.around(model.sparse_coef_[0,idx], decimals = 3)
    weight = np.around(coeff**2/w_norm**2, decimals = 3)
    print(f"{columns[idx]} with coefficient {coeff} and relative weight {weight}")

The relevant features are (in order of importance):
dih_cos1 with coefficient -0.623 and relative weight 0.675
dih_cos5 with coefficient 0.432 and relative weight 0.325
