In [2]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.metrics import accuracy_score
import pandas as pd
import tensorflow as tf
import numpy as np
from sqlalchemy import create_engine
import config

In [3]:
# Connecting pandas to the SQL database
engine = create_engine(f'{config.dialect}://{config.user}:{config.password}@{config.host}:{config.port}/{config.database}') 

In [4]:
#Reading the postgres Database
df = pd.read_sql("SELECT * FROM final_table", engine)
df

Unnamed: 0,years,gics_sector,ticker,avg_daily_close_change,avg_adj_close,ann_real_gdp_gwth_pct,net_savings_rate_household_disp_inc_pct,tax_on_corp_profits_pctofgdp,tax_on_goods_and_services_pctofgdp,imports_of_goods_and_services_pctofgdp,exports_of_goods_and_services_pctofgdp,infl_rate_all_items_ann_grwth_pct,lt_int_rate_pct,ppi_usd_per_usd,teritary_attainment_aged_25_to_64_pct,unemp_rate_total_pct,labor_comp_per_unit_labor_input_ann_growth_pct,pop_grwth_rate_pct,life_expectancy_yrs
0,2013,Communication Services,DIS,0.001813,57.213066,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
1,2013,Communication Services,DISH,0.001965,42.985992,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
2,2013,Communication Services,EA,0.002103,21.654790,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
3,2013,Communication Services,GOOGL,0.001919,442.563933,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
4,2013,Communication Services,LUMN,-0.000377,16.635402,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1051,2020,Utilities,EXC,0.000375,27.024073,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3
1052,2020,Utilities,NEE,0.001381,64.396236,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3
1053,2020,Utilities,PEG,0.000416,50.990339,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3
1054,2020,Utilities,SO,0.000438,54.000639,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3


In [5]:
#Looking at the different data types
df.dtypes

years                                               int64
gics_sector                                        object
ticker                                             object
avg_daily_close_change                            float64
avg_adj_close                                     float64
ann_real_gdp_gwth_pct                             float64
net_savings_rate_household_disp_inc_pct           float64
tax_on_corp_profits_pctofgdp                      float64
tax_on_goods_and_services_pctofgdp                float64
imports_of_goods_and_services_pctofgdp            float64
exports_of_goods_and_services_pctofgdp            float64
infl_rate_all_items_ann_grwth_pct                 float64
lt_int_rate_pct                                   float64
ppi_usd_per_usd                                   float64
teritary_attainment_aged_25_to_64_pct             float64
unemp_rate_total_pct                              float64
labor_comp_per_unit_labor_input_ann_growth_pct    float64
pop_grwth_rate

In [6]:
#Cleaning the data for the Machine Learning Model
clean_df = df.drop(columns=["years", "ticker"])
clean_df

Unnamed: 0,gics_sector,avg_daily_close_change,avg_adj_close,ann_real_gdp_gwth_pct,net_savings_rate_household_disp_inc_pct,tax_on_corp_profits_pctofgdp,tax_on_goods_and_services_pctofgdp,imports_of_goods_and_services_pctofgdp,exports_of_goods_and_services_pctofgdp,infl_rate_all_items_ann_grwth_pct,lt_int_rate_pct,ppi_usd_per_usd,teritary_attainment_aged_25_to_64_pct,unemp_rate_total_pct,labor_comp_per_unit_labor_input_ann_growth_pct,pop_grwth_rate_pct,life_expectancy_yrs
0,Communication Services,0.001813,57.213066,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
1,Communication Services,0.001965,42.985992,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
2,Communication Services,0.002103,21.654790,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
3,Communication Services,0.001919,442.563933,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
4,Communication Services,-0.000377,16.635402,1.84,6.32,2.09,4.40,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.70,78.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1051,Utilities,0.000375,27.024073,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3
1052,Utilities,0.001381,64.396236,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3
1053,Utilities,0.000416,50.990339,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3
1054,Utilities,0.000438,54.000639,-3.40,17.05,1.32,4.32,13.28,10.16,1.23,0.89,1.0,50.06,8.09,6.80,0.35,77.3


In [7]:
#Creating a column that defines a stable stock
clean_df['is_stable'] = np.where(clean_df['avg_daily_close_change'] < abs(0.001), 1, 0)
clean_df.head()

Unnamed: 0,gics_sector,avg_daily_close_change,avg_adj_close,ann_real_gdp_gwth_pct,net_savings_rate_household_disp_inc_pct,tax_on_corp_profits_pctofgdp,tax_on_goods_and_services_pctofgdp,imports_of_goods_and_services_pctofgdp,exports_of_goods_and_services_pctofgdp,infl_rate_all_items_ann_grwth_pct,lt_int_rate_pct,ppi_usd_per_usd,teritary_attainment_aged_25_to_64_pct,unemp_rate_total_pct,labor_comp_per_unit_labor_input_ann_growth_pct,pop_grwth_rate_pct,life_expectancy_yrs,is_stable
0,Communication Services,0.001813,57.213066,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.7,78.8,0
1,Communication Services,0.001965,42.985992,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.7,78.8,0
2,Communication Services,0.002103,21.65479,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.7,78.8,0
3,Communication Services,0.001919,442.563933,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.7,78.8,0
4,Communication Services,-0.000377,16.635402,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,1.0,43.91,7.38,1.49,0.7,78.8,1


In [8]:
# Generate our categorical variable list
clean_cat = clean_df.dtypes[clean_df.dtypes == "object"].index.tolist()


# Check the number of unique values in each column
clean_df[clean_cat].nunique()

gics_sector    11
dtype: int64

In [9]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(clean_df[clean_cat]))

# Add the encoded variable names to the dataframe
encode_df.columns = enc.get_feature_names(clean_cat)
encode_df.head()

Unnamed: 0,gics_sector_Communication Services,gics_sector_Consumer Discretionary,gics_sector_Consumer Staples,gics_sector_Energy,gics_sector_Financials,gics_sector_Health Care,gics_sector_Industrials,gics_sector_Information Technology,gics_sector_Materials,gics_sector_Real Estate,gics_sector_Utilities
0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
# Merge one-hot encoded features and drop the originals
clean_df = clean_df.merge(encode_df,left_index=True, right_index=True)
clean_df = clean_df.drop(clean_cat,1)
clean_df.head()

  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,avg_daily_close_change,avg_adj_close,ann_real_gdp_gwth_pct,net_savings_rate_household_disp_inc_pct,tax_on_corp_profits_pctofgdp,tax_on_goods_and_services_pctofgdp,imports_of_goods_and_services_pctofgdp,exports_of_goods_and_services_pctofgdp,infl_rate_all_items_ann_grwth_pct,lt_int_rate_pct,...,gics_sector_Consumer Discretionary,gics_sector_Consumer Staples,gics_sector_Energy,gics_sector_Financials,gics_sector_Health Care,gics_sector_Industrials,gics_sector_Information Technology,gics_sector_Materials,gics_sector_Real Estate,gics_sector_Utilities
0,0.001813,57.213066,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.001965,42.985992,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.002103,21.65479,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.001919,442.563933,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,-0.000377,16.635402,1.84,6.32,2.09,4.4,16.42,13.58,1.46,2.35,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# Split our preprocessed data into our features and target arrays
y = clean_df.is_stable.values
X = clean_df.drop(columns=["is_stable"]).values

# Split the preprocessed data into a training and testing dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=78)

In [12]:
# Create a StandardScaler instances
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)


In [13]:
# Define the basic neural network model
nn_model = tf.keras.models.Sequential()
nn_model.add(tf.keras.layers.Dense(units=16, activation="relu", input_dim=27))
nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn_model.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn_model.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
9/9 - 0s - loss: 0.1217 - accuracy: 0.9735 - 124ms/epoch - 14ms/step
Loss: 0.12166283279657364, Accuracy: 0.9734848737716675
