In [1]:
# Imports
import pandas as pd
from pathlib import Path
import tensorflow as tf
from tensorflow.keras.layers import Dense
from tensorflow.keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder


In [2]:
#Import data for nn_continuous value prediction
def import_data(file_name):
    
    #EH:  Create DataFrame from csv
    sector_data_df = pd.read_csv(Path('Data_Prep_Output/'+file_name+'.csv'))
    
    #EH:  Drop unnamed, currency, ticker, sector columns from the DataFrame
    sector_data_df = sector_data_df.drop(columns=['Unnamed: 0','reportedCurrency','ticker','Sector','date_x'],axis=1)
    
#     #EH: rename date_x
#     sector_data_df=sector_data_df.rename(columns={'date_x':'date'})

#     #EH: change date to datetime format
#     sector_data_df['date']=pd.to_datetime(sector_data_df['date'])
    
    return sector_data_df

In [3]:
def data_prep(dataframe):
    #Isolating the categorical variables
    categorical_variables = list(dataframe.dtypes[dataframe.dtypes == "datetime64[ns]"].index)

    #Calling an instance of OneHotEncoder
    enc = OneHotEncoder(sparse=False)
    
    #Encoding the categorical variables
    encoded_data = enc.fit_transform(dataframe[categorical_variables])

    #Creating a new dataframe of the categorical variables
    encoded_df = pd.DataFrame(encoded_data,columns = enc.get_feature_names_out(categorical_variables))

    #Combining the newly encoded categorical variables with the original dataframe again
    encoded_df = pd.concat([dataframe.drop(columns = categorical_variables),encoded_df], axis=1)
    
    return encoded_df

In [4]:
def data_separation(df, dependent_variable):
    #Separating and scaling the dependent and independent variables
    
    y = df[dependent_variable]
    X = df.drop([dependent_variable],axis=1)
    
    #Splitting the training and testing datasets
    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)
    
    #Scaling the data
    scaler = StandardScaler()

    # Fit the scaler to the features training dataset
    X_scaler = scaler.fit(X_train)

    # Fit the scaler to the features training dataset
    X_train_scaled = X_scaler.transform(X_train)
    X_test_scaled = X_scaler.transform(X_test)
    
    return y_train, y_test,  X_train_scaled, X_test_scaled

In [5]:
#Selecting the number of input features
#number_input_features = len(X_train_scaled.iloc[0])

def neural_network(input_features, output, layer_1, layer_2, layer_3, X_train_scaled,y_train,epochs,save_path):
    number_input_features = input_features

    #Selecting the number of output neurons
    number_output_neurons = output

    # Define the number of hidden nodes for the first hidden layer
    hidden_nodes_layer1 =  layer_1

    # Define the number of hidden nodes for the second hidden layer
    hidden_nodes_layer2 =  layer_2

    # Define the number of hidden nodes for the second hidden layer
    hidden_nodes_layer3 =  layer_3

    # Create the Sequential model instance
    nn = Sequential()

    # Add the first hidden layer
    nn.add(Dense(units = hidden_nodes_layer1, input_dim = number_input_features, activation = "relu"))

    # Add the second hidden layer
    nn.add(Dense(units=hidden_nodes_layer2, activation="relu"))

    # Add the third hidden layer
    nn.add(Dense(units=hidden_nodes_layer3, activation="relu"))

    # Add the output layer to the model specifying the number of output neurons and activation function
    nn.add(Dense(units=number_output_neurons, activation = "linear"))

    # Compile the Sequential model
    #nn.compile(loss = "BinaryCrossentropy", optimizer = "adam", metrics = ["accuracy"])
    nn.compile(loss = "mean_squared_error", optimizer = "adam", metrics = ["mse"])
    
          
    #Savign the model
    nn.save(save_path)
    
    #Fitting the model
    return nn.fit(X_train_scaled,y_train,epochs=epochs)

In [6]:
#EH:  Sector list

sector_list_nn=['Consumer Non-Durables','Electronic Technology','Finance','Health Technology','Process Industries','Producer Manufacturing','Technology Services','Utilities']

In [7]:

#EH: Loop through each sector, to create and save nn models

for each in sector_list_nn:
    sector_data_df=import_data(each)
    # encoded_df=data_prep(sector_data_df)
    y_train, y_test,  X_train_scaled, X_test_scaled=data_separation(sector_data_df,'q_roi')
    shape=X_train_scaled.shape[1]
    
    #check feature size
    print(f'{each} shape: {shape}')
    
    #path to save model
    save_path = Path('model/nn_'+each+'.h5')
    
    #run model creation
    neural_network(int(shape),1,int(shape/2+1),10,4,X_train_scaled,y_train,170,save_path)
    print("*"*100)

    

Consumer Non-Durables shape: 34
Epoch 1/170
Epoch 2/170
Epoch 3/170
Epoch 4/170
Epoch 5/170
Epoch 6/170
Epoch 7/170
Epoch 8/170
Epoch 9/170
Epoch 10/170
Epoch 11/170
Epoch 12/170
Epoch 13/170
Epoch 14/170
Epoch 15/170
Epoch 16/170
Epoch 17/170
Epoch 18/170
Epoch 19/170
Epoch 20/170
Epoch 21/170
Epoch 22/170
Epoch 23/170
Epoch 24/170
Epoch 25/170
Epoch 26/170
Epoch 27/170
Epoch 28/170
Epoch 29/170
Epoch 30/170
Epoch 31/170
Epoch 32/170
Epoch 33/170
Epoch 34/170
Epoch 35/170
Epoch 36/170
Epoch 37/170
Epoch 38/170
Epoch 39/170
Epoch 40/170
Epoch 41/170
Epoch 42/170
Epoch 43/170
Epoch 44/170
Epoch 45/170
Epoch 46/170
Epoch 47/170
Epoch 48/170
Epoch 49/170
Epoch 50/170
Epoch 51/170
Epoch 52/170
Epoch 53/170
Epoch 54/170
Epoch 55/170
Epoch 56/170
Epoch 57/170
Epoch 58/170
Epoch 59/170
Epoch 60/170
Epoch 61/170
Epoch 62/170
Epoch 63/170
Epoch 64/170
Epoch 65/170
Epoch 66/170
Epoch 67/170
Epoch 68/170
Epoch 69/170
Epoch 70/170
Epoch 71/170
Epoch 72/170
Epoch 73/170
Epoch 74/170
Epoch 75/170
Ep