# Features Engineering
Moving averages and moving measures of volatility (i.e., standard devistions) will be generated to be used as features for a principal component analysis (PCA) with the goal to reduce input variables.

In [5]:
### Defining function to perform features engineering tasks

def features_engineering(df,verbose=True):
    
    # Importing required libraries
    import numpy as np
    import pandas as pd
    from sklearn.decomposition import PCA
    from sklearn.preprocessing import StandardScaler
    from sklearn.model_selection import train_test_split
    
    
    # Variables
    global X_train
    global X_test
    global X_val
    global y_train
    global y_test
    global y_val
    global X_train_scaled
    global X_test_scaled
    global X_val_scaled
    global X_train_pca
    global X_test_pca
    global X_val_pca
    global X_test_val
    global y_test_val
    
    
    # Calling create_lagged_series function to calculate returns, lagged returns, volume, and direction
    %run create_lagged_series.ipynb
    ret=create_lagged_series(df)
    
    # Creating training, test, and validation sets
    y=ret['today']
    X=ret
    X.drop('today', axis=1, inplace=True)
     
    
    # Using 33% for training and 66% for testing 
    (X_train, X_test_val, y_train, y_test_val) =train_test_split(X,y,test_size=0.75,random_state=1,shuffle=False)
    (X_test, X_val, y_test, y_val) =train_test_split(X_test_val,y_test_val,test_size=0.8,random_state=1,shuffle=False)
        
    # Normalizing data
    # Using the `StandardScaler()` module from scikit-learn to normalize the data
    X_train_scaled = StandardScaler().fit_transform(X_train)
    X_test_scaled = StandardScaler().fit_transform(X_test)
    X_val_scaled = StandardScaler().fit_transform(X_val)
    
    # Performing Principal Component Analysis (PCA), reducing to 3 components
    pca_model=PCA(n_components=3)

    X_train_pca= pca_model.fit_transform(X_train_scaled)
    X_test_pca= pca_model.fit_transform(X_test_scaled)
    X_val_pca = pca_model.fit_transform(X_val_scaled)
        
    if verbose == True:
        print('Explained total variation of 3 components: {}'.format(pca_model.explained_variance_ratio_.sum())) 
    
    