In [3]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import tensorflow as tf

# Import our input dataset
tsla_df = pd.read_csv('Resources/daily.txt')
tsla_df.head(10)

Unnamed: 0,ticker,Date,Open,High,Low,Close,Adj Close,Volume
0,TSLA,2010-06-29,3.8,5.0,3.508,4.778,4.778,93831500
1,TSLA,2010-06-30,5.158,6.084,4.66,4.766,4.766,85935500
2,TSLA,2010-07-01,5.0,5.184,4.054,4.392,4.392,41094000
3,TSLA,2010-07-02,4.6,4.62,3.742,3.84,3.84,25699000
4,TSLA,2010-07-06,4.0,4.0,3.166,3.222,3.222,34334500
5,TSLA,2010-07-07,3.28,3.326,2.996,3.16,3.16,34608500
6,TSLA,2010-07-08,3.228,3.504,3.114,3.492,3.492,38557000
7,TSLA,2010-07-09,3.516,3.58,3.31,3.48,3.48,20253000
8,TSLA,2010-07-12,3.59,3.614,3.4,3.41,3.41,11012500
9,TSLA,2010-07-13,3.478,3.728,3.38,3.628,3.628,13400500


In [4]:
# Generate our categorical variable list
tsla_cat = tsla_df.dtypes[tsla_df.dtypes == "object"].index.tolist()

# Check the number of unique values in each column
tsla_df[tsla_cat].nunique()

ticker       1
Date      2821
dtype: int64

In [5]:
# Create a OneHotEncoder instance
enc = OneHotEncoder(sparse=False)

# Fit and transform the OneHotEncoder using the categorical variable list
encode_df = pd.DataFrame(enc.fit_transform(tsla_df[tsla_cat]))

# Add the encoded variable names to the DataFrame
encode_df.columns = enc.get_feature_names(tsla_cat)
encode_df.head()

Unnamed: 0,ticker_TSLA,Date_2010-06-29,Date_2010-06-30,Date_2010-07-01,Date_2010-07-02,Date_2010-07-06,Date_2010-07-07,Date_2010-07-08,Date_2010-07-09,Date_2010-07-12,...,Date_2021-08-27,Date_2021-08-30,Date_2021-08-31,Date_2021-09-01,Date_2021-09-02,Date_2021-09-03,Date_2021-09-07,Date_2021-09-08,Date_2021-09-09,Date_2021-09-10
0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
# Merge one-hot encoded features and drop the originals
tsla_df = tsla_df.merge(encode_df,left_index=True, right_index=True)
tsla_df = tsla_df.drop(tsla_cat,1)
tsla_df.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,ticker_TSLA,Date_2010-06-29,Date_2010-06-30,Date_2010-07-01,...,Date_2021-08-27,Date_2021-08-30,Date_2021-08-31,Date_2021-09-01,Date_2021-09-02,Date_2021-09-03,Date_2021-09-07,Date_2021-09-08,Date_2021-09-09,Date_2021-09-10
0,3.8,5.0,3.508,4.778,4.778,93831500,1.0,1.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5.158,6.084,4.66,4.766,4.766,85935500,1.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,5.0,5.184,4.054,4.392,4.392,41094000,1.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,4.6,4.62,3.742,3.84,3.84,25699000,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,4.0,4.0,3.166,3.222,3.222,34334500,1.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [11]:
# set x and y
y = tsla_df
X = tsla_df.drop(columns=["Adj Close"])

# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

# Create a StandardScaler instance
scaler = StandardScaler()

# Fit the StandardScaler
X_scaler = scaler.fit(X_train)

# Scale the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

ValueError: The least populated class in y has only 1 member, which is too few. The minimum number of groups for any class cannot be less than 2.

In [12]:
# Create a random forest classifier.
rf_model = RandomForestClassifier(n_estimators=128, random_state=78)

# Fitting the model
rf_model = rf_model.fit(X_train_scaled, y_train)

# Evaluate the model
y_pred = rf_model.predict(X_test_scaled)
print(f" Random forest predictive accuracy: {accuracy_score(y_test,y_pred):.3f}")

NameError: name 'X_train_scaled' is not defined

In [13]:
# Define the model - deep neural net
number_input_features = len(X_train_scaled[0])
hidden_nodes_layer1 =  24
hidden_nodes_layer2 = 12

nn = tf.keras.models.Sequential()

# First hidden layer
nn.add(
    tf.keras.layers.Dense(units=hidden_nodes_layer1, input_dim=number_input_features, activation="relu")
)

# Second hidden layer
nn.add(tf.keras.layers.Dense(units=hidden_nodes_layer2, activation="relu"))


# Output layer
nn.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Compile the Sequential model together and customize metrics
nn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

# Train the model
fit_model = nn.fit(X_train_scaled, y_train, epochs=50)

# Evaluate the model using the test data
model_loss, model_accuracy = nn.evaluate(X_test_scaled,y_test,verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

NameError: name 'X_train_scaled' is not defined