In [1]:
import pandas as pd
import numpy as np 
import pickle
import matplotlib.pyplot as plt
from sklearn.linear_model import SGDClassifier, RidgeClassifier, LogisticRegression 
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.svm import SVC,LinearSVC
from sklearn.model_selection import RepeatedStratifiedKFold, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.metrics import classification_report, plot_confusion_matrix, confusion_matrix
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier, BaggingClassifier, AdaBoostClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from PyFiles import Functions as func
from PyFiles import Preprocessing as process

In [2]:
df = pd.read_csv('FData/Headlines/New/SPYHeadlinesGrouped.csv')

### Model 1

In [3]:
max_feat = 5000 
min_df = 1
max_df = 1.0 
ngram = (1,1)
pre_type = 'stem'

new_df, x_train, x_test, y_train, y_test, preprocessing_dict = process.preprocess_tts(df, pre_type = pre_type, 
                                                                              ngram = ngram, 
                                                                              max_features = max_feat, 
                                                                             min_df = min_df, max_df = max_df)

Getting Preprocessing Objects and Transforming Data: 100%|██████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:48<00:00, 32.55s/it]

Train:	1873
1    1033
0     840
Name: Target, dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Test:	804
1    444
0    360
Name: Target, dtype: int64
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~


In [4]:
models = {'Log': LogisticRegression(max_iter = 2500), 'Knn': KNeighborsClassifier(), 'DT': DecisionTreeClassifier(random_state = 10), 
          'Gaussian': GaussianNB(), 'LDA': LinearDiscriminantAnalysis(),
          'LinearSVC': LinearSVC(max_iter = 2500, random_state = 10), 'SDGSVC': SGDClassifier(random_state = 10),  
          'ADA': AdaBoostClassifier(random_state = 10), 'Bagging': BaggingClassifier(random_state = 10), 
          'Ridge': RidgeClassifier(random_state = 10), 'RF': RandomForestClassifier(random_state = 10)}
new_models = func.stacked_model(models)
stacked = new_models['Stacked']

stacked.fit(x_train, y_train)
stacked.score(x_test, y_test)

  return f(**kwargs)


KeyboardInterrupt: 

In [None]:
# getting results and model
result_dict = func.test_models(x_train, y_train, new_models, n_jobs = 12)
func.plot_model_results(result_dict)

In [None]:
assert False

### Model 2

In [None]:
models = {'Log': LogisticRegression(max_iter = 2500), 'DT': DecisionTreeClassifier(random_state = 10), 
          'Gaussian': GaussianNB(), 'LDA': LinearDiscriminantAnalysis(),
          'LinearSVC': LinearSVC(max_iter = 2500, random_state = 10), 'SDGSVC': SGDClassifier(random_state = 10),  
          'ADA': AdaBoostClassifier(random_state = 10), 'Bagging': BaggingClassifier(random_state = 10), 
          'Ridge': RidgeClassifier(random_state = 10), 'RF': RandomForestClassifier(random_state = 10)}
#create stacked model
new_models = func.stacked_model(models)
stacked = new_models['Stacked']
stacked.fit(x_train, y_train)
stacked.score(x_test, y_test)

In [None]:
# getting results and model
result_dict = func.test_models(x_train, y_train, new_models, n_jobs = 12)
func.plot_model_results(result_dict, filepath = f'Images/Sklearn/{model_version}_2.png')

### Model 3

In [None]:
models = {'DT': DecisionTreeClassifier(random_state = 10),  
          'ADA': AdaBoostClassifier(random_state = 10), 'Bagging': BaggingClassifier(random_state = 10), 
          'RF': RandomForestClassifier(random_state = 10)}

#create stacked model
new_models = func.stacked_model(models)

stacked = new_models['Stacked']

stacked.fit(x_train, y_train)
stacked.score(x_test, y_test)

In [None]:
# getting results and model
result_dict = func.test_models(x_train, y_train, new_models, n_jobs = 12)
func.plot_model_results(result_dict, filepath = f'Images/Sklearn/{model_version}_3.png')

In [None]:
assert False

## Keras

In [None]:
from keras.models import Sequential 
from keras.layers import Dense
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

In [None]:
def get_nn_model(dim, act): 
    
    model = Sequential() 
    model.add(Dense(256, activation = act, input_dim = dim[1], name = 'Input'))
    model.add(Dense(128, activation = act))
    model.add(Dense(64, activation = act))
    model.add(Dense(32, activation = act))
    model.add(Dense(8, activation = act))



    
    model.add(Dense(1, activation = 'sigmoid', name = 'Output'))
    model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    
    return model

In [None]:
early_stopping = EarlyStopping(monitor='val_acc', verbose = 1, patience=10, min_delta = .00075)
model_checkpoint = ModelCheckpoint(f'ModelWeights/VanillaNN.h5', verbose = 1, save_best_only=True,
                                  monitor = 'val_acc')
lr_plat = ReduceLROnPlateau(patience = 5, mode = 'min')


dim = (1, x_train.shape[1], 1)
epochs = 1000 
cb = [early_stopping, model_checkpoint, lr_plat]
bs = 32



nn_model = get_nn_model(dim, 'relu')
nn_model.fit(x_train, y_train, epochs = epochs, batch_size = bs, validation_data = (x_test, y_test), callbacks = cb)