## 461 Project

In [None]:
pip install yfinance

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures, StandardScaler
from sklearn.linear_model import LinearRegression, Ridge, Lasso, SGDRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import datetime

# Define the stock symbols
stocks = {
    'Banking': ['JPM', 'BAC', 'WFC'],
    'Communication': ['T', 'VZ', 'CMCSA'],
    'Manufacturing': ['GE', 'MMM', 'HON'],
    'Technology': ['AAPL', 'MSFT', 'GOOGL']
}

# Function to fetch historical data

# Function to fetch historical data
def fetch_data(stocks, start_date, end_date):
    historical_data = {}
    for sector, symbols in stocks.items():
        historical_data[sector] = {symbol: yf.Ticker(symbol).history(start=start_date, end=end_date) for symbol in symbols}
    return historical_data

# Define the date range
start_date = '2010-01-01'
end_date = '2023-11-30'

# Fetch the data
data = fetch_data(stocks, start_date, end_date)

# Model Functions
def apply_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return np.sqrt(mean_squared_error(y_test, predictions))

def prepare_data(stock_data):
    stock_data['Target'] = stock_data['Close'].shift(-1)
    stock_data = stock_data.dropna()  # Drop NaN values from the entire dataframe
    X = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = stock_data['Target']
    return train_test_split(X, y, test_size=0.2, random_state=42)


def polynomial_regression(X_train, X_test, y_train, y_test, degree):
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_poly, y_train)
    predictions = model.predict(X_test_poly)
    return np.sqrt(mean_squared_error(y_test, predictions))

# Iterate over each sector and stock
results = {}
for sector, stocks in data.items():
    sector_results = {}
    for stock_symbol, stock_data in stocks.items():
        X_train, X_test, y_train, y_test = prepare_data(stock_data)

        # Scale Data for Some Models
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Apply Models
        stock_results = {
            'Linear': apply_model(LinearRegression(), X_train, X_test, y_train, y_test),
            'Ridge': apply_model(Ridge(), X_train_scaled, X_test_scaled, y_train, y_test),
            'Lasso': apply_model(Lasso(), X_train_scaled, X_test_scaled, y_train, y_test),
            'SGD': apply_model(SGDRegressor(), X_train_scaled, X_test_scaled, y_train, y_test),
            'DecisionTree': apply_model(DecisionTreeRegressor(), X_train, X_test, y_train, y_test),
            'RandomForest': apply_model(RandomForestRegressor(), X_train, X_test, y_train, y_test),
            # Add more models as needed
        }

        # DNN Model
        dnn_model = Sequential([Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
                                Dense(64, activation='relu'),
                                Dense(32, activation='relu'),
                                Dense(1)])
        dnn_model.compile(optimizer='adam', loss='mean_squared_error')
        dnn_model.fit(X_train_scaled, y_train, epochs=50, batch_size=32)
        dnn_predictions = dnn_model.predict(X_test_scaled)
        stock_results['DNN'] = np.sqrt(mean_squared_error(y_test, dnn_predictions))

        sector_results[stock_symbol] = stock_results
    results[sector] = sector_results

# Display results
for sector, sector_data in results.items():
    print(f"Sector: {sector}")
    for stock, stock_data in sector_data.items():
        print(f"  Stock: {stock}")
        for model, rmse in stock_data.items():
            print(f"    Model: {model}, RMSE: {rmse}")
    print()


# New Section

In [None]:

# Function for Polynomial Regression
def polynomial_regression(X_train, X_test, y_train, y_test, degree):
    poly = PolynomialFeatures(degree=degree)
    X_train_poly = poly.fit_transform(X_train)
    X_test_poly = poly.transform(X_test)

    model = LinearRegression()
    model.fit(X_train_poly, y_train)
    predictions = model.predict(X_test_poly)
    return np.sqrt(mean_squared_error(y_test, predictions))

# Function to prepare data
def prepare_data(stock_data):
    stock_data['Target'] = stock_data['Close'].shift(-1)
    stock_data.dropna(inplace=True)
    X = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = stock_data['Target']
    return train_test_split(X, y, test_size=0.2, random_state=42)

# Iterate over each sector and stock
results = {}
for sector, stocks in data.items():
    sector_results = {}
    for stock_symbol, stock_data in stocks.items():
        X_train, X_test, y_train, y_test = prepare_data(stock_data)

        # Find the best degree for Polynomial Regression
        best_degree = None
        lowest_rmse = float('inf')
        for degree in range(1, 10):  # Test degrees from 1 to 4
            rmse = polynomial_regression(X_train, X_test, y_train, y_test, degree)
            if rmse < lowest_rmse:
                best_degree = degree
                lowest_rmse = rmse

        sector_results[stock_symbol] = {'Best Degree': best_degree, 'RMSE': lowest_rmse}
    results[sector] = sector_results

# Display results
for sector, sector_data in results.items():
    print(f"Sector: {sector}")
    for stock, best_data in sector_data.items():
        print(f"  Stock: {stock}, Best Degree: {best_data['Best Degree']}, RMSE: {best_data['RMSE']}")
    print()


In [None]:
stocks

In [None]:
from sklearn.linear_model import ElasticNet

# Define the stock symbols
stocks = {
    'Banking': ['JPM', 'BAC', 'WFC'],
    'Communication': ['T', 'VZ', 'CMCSA'],
    'Manufacturing': ['GE', 'MMM', 'HON'],
    'Technology': ['AAPL', 'MSFT', 'GOOGL']
}

# Function to fetch historical data

# Function to fetch historical data
def fetch_data(stocks, start_date, end_date):
    historical_data = {}
    for sector, symbols in stocks.items():
        historical_data[sector] = {symbol: yf.Ticker(symbol).history(start=start_date, end=end_date) for symbol in symbols}
    return historical_data

# Define the date range
start_date = '2010-01-01'
end_date = '2023-11-30'

# Fetch the data
data = fetch_data(stocks, start_date, end_date)
def elasticnet_regression(X_train, X_test, y_train, y_test, alpha=1.0, l1_ratio=0.5):
    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio)
    model.fit(X_train, y_train)
    predictions = model.predict(X_test)
    return np.sqrt(mean_squared_error(y_test, predictions))

elasticnet_details = {}
results = {}  # Ensure this dictionary is initialized
# ElasticNet with different alpha values
for sector, stocks in data.items():
    sector_results = {}
    for stock_symbol, stock_data in stocks.items():
        X_train, X_test, y_train, y_test = prepare_data(stock_data)

        # Scale Data for Some Models
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # ElasticNet with different alpha values
        elasticnet_results = {}
        best_alpha = None
        lowest_rmse = float('inf')
        for alpha in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9,15]:
            rmse = elasticnet_regression(X_train_scaled, X_test_scaled, y_train, y_test, alpha=alpha)
            elasticnet_results[alpha] = rmse
            if rmse < lowest_rmse:
                best_alpha = alpha
                lowest_rmse = rmse
        elasticnet_best = {'Best Alpha': best_alpha, 'RMSE': lowest_rmse}
        elasticnet_details[stock_symbol] = elasticnet_results

        # Combine results
        stock_results = {'ElasticNet': elasticnet_best}  # Add other models here as well
        sector_results[stock_symbol] = stock_results
    results[sector] = sector_results

# Display results
for sector, sector_data in results.items():
    print(f"Sector: {sector}")
    for stock, stock_data in sector_data.items():
        print(f"  Stock: {stock}")
        for model, model_data in stock_data.items():
            print(f"    Model: {model}, Best Alpha: {model_data['Best Alpha']}, RMSE: {model_data['RMSE']}")
    print()


## DNN

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV


# Fetch data (assuming this function is already defined)
def fetch_data(stocks):
    historical_data = {}
    for sector, symbols in stocks.items():
        historical_data[sector] = {symbol: yf.Ticker(symbol).history(period="5y") for symbol in symbols}
    return historical_data

# Data preparation (assuming this function is already defined)
def prepare_data(stock_data):
    stock_data['Target'] = stock_data['Close'].shift(-1)
    stock_data.dropna(inplace=True)
    X = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = stock_data['Target']
    return train_test_split(X, y, test_size=0.2, random_state=42)

# DNN Model Building
def build_dnn_model(input_shape, dense_layer_sizes, dropout_rate, reg_rate, learning_rate=0.001, activation='relu'):
    model = Sequential()
    for i, size in enumerate(dense_layer_sizes):
        if i == 0:
            model.add(Dense(size, activation=activation, input_shape=(input_shape,), kernel_regularizer=l2(reg_rate)))
        else:
            model.add(Dense(size, activation=activation, kernel_regularizer=l2(reg_rate)))
        model.add(BatchNormalization())
        model.add(Dropout(dropout_rate))
    model.add(Dense(1))
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='mean_squared_error')
    return model


# Define the stock symbols
stocks = {
    'Banking': ['JPM', 'BAC', 'WFC'],
    'Communication': ['T', 'VZ', 'CMCSA'],
    'Manufacturing': ['GE', 'MMM', 'HON'],
    'Technology': ['AAPL', 'MSFT', 'GOOGL']
}

# Fetch and prepare the data
data = fetch_data(stocks)
results = {}



# Training and Evaluating the Model
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from sklearn.model_selection import RandomizedSearchCV

def train_and_evaluate(X_train, y_train, X_test, y_test, input_shape, epochs=100, batch_size=32, n_iter=10, cv=3):
    model = KerasRegressor(build_fn=lambda dense_layer_sizes, dropout_rate, reg_rate, learning_rate, activation: build_dnn_model(input_shape, dense_layer_sizes, dropout_rate, reg_rate, learning_rate, activation), epochs=epochs, batch_size=batch_size, verbose=0)

    param_dist = {
        'dense_layer_sizes': [(128, 64, 32), (64, 32), (128, 64), (256, 128, 64)],
        'dropout_rate': [0.2, 0.3, 0.4, 0.5],
        'reg_rate': [0.001, 0.01, 0.05, 0.1],
        'learning_rate': [0.001, 0.0005, 0.0001],
        'activation': ['relu', 'tanh']
    }

    random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=n_iter, cv=cv)
    random_search_result = random_search.fit(X_train, y_train)
    best_model = random_search_result.best_estimator_.model

    # Train the best model fully and get the history
    history = best_model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=0, validation_split=0.2)

    predictions = best_model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, predictions))
    return rmse, random_search_result.best_params_, history




for sector, sector_stocks in data.items():
    for stock_symbol, stock_data in sector_stocks.items():
        X_train, X_test, y_train, y_test = prepare_data(stock_data)

        # Scale the data
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train the model and perform hyperparameter tuning
        rmse, best_params = train_and_evaluate(X_train_scaled, y_train, X_test_scaled, y_test, input_shape=X_train_scaled.shape[1])

        # Store results
        if sector not in results:
            results[sector] = {}
        results[sector][stock_symbol] = {'RMSE': rmse, 'Best Parameters': best_params}

# Display results
for sector, sector_data in results.items():
    print(f"Sector: {sector}")
    for stock, metrics in sector_data.items():
        print(f"  Stock: {stock}, RMSE: {metrics['RMSE']}, Best Parameters: {metrics['Best Parameters']}")


In [None]:
import matplotlib.pyplot as plt

def plot_loss_and_accuracy(history, stock_symbol):
    plt.figure(figsize=(12, 5))

    # Plot loss
    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Train Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title(f'{stock_symbol} - Loss over Epochs')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Check if accuracy is in history
    if 'accuracy' in history.history:
        # Plot accuracy
        plt.subplot(1, 2, 2)
        plt.plot(history.history['accuracy'], label='Train Accuracy')
        plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
        plt.title(f'{stock_symbol} - Accuracy over Epochs')
        plt.xlabel('Epochs')
        plt.ylabel('Accuracy')
        plt.legend()

    plt.tight_layout()
    plt.show()



for sector, sector_stocks in data.items():
    for stock_symbol, stock_data in sector_stocks.items():
        X_train, X_test, y_train, y_test = prepare_data(stock_data)

        # Scale the data
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Train the model and perform hyperparameter tuning
        rmse, best_params = train_and_evaluate(X_train_scaled, y_train, X_test_scaled, y_test, input_shape=X_train_scaled.shape[1])

        # Store results
        if sector not in results:
            results[sector] = {}
        results[sector][stock_symbol] = {'RMSE': rmse, 'Best Parameters': best_params}

# Display results
for sector, sector_data in results.items():
    print(f"Sector: {sector}")
    for stock, metrics in sector_data.items():
        print(f"  Stock: {stock}, RMSE: {metrics['RMSE']}, Best Parameters: {metrics['Best Parameters']}")

In [None]:
import numpy as np
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2

# Function to fetch historical data
def fetch_data(stocks, start_date, end_date):
    historical_data = {}
    for sector, symbols in stocks.items():
        historical_data[sector] = {symbol: yf.Ticker(symbol).history(start=start_date, end=end_date) for symbol in symbols}
    return historical_data

# Data preparation for classification


def prepare_data(stock_data):
    stock_data['Price_Up'] = (stock_data['Close'] < stock_data['Close'].shift(-1)).astype(int)
    stock_data.dropna(inplace=True)
    X = stock_data[['Open', 'High', 'Low', 'Close', 'Volume']]
    y = stock_data['Price_Up']
    return train_test_split(X, y, test_size=0.2, random_state=42)


# DNN Classifier
def build_dnn_classifier(input_shape, learning_rate=0.001, units_per_layer=[128, 64, 32]):
    model = Sequential()
    model.add(Dense(units_per_layer[0], activation='relu', input_shape=(input_shape,), kernel_regularizer=l2(0.001)))
    model.add(BatchNormalization())
    model.add(Dropout(0.3))
    for units in units_per_layer[1:]:
        model.add(Dense(units, activation='relu', kernel_regularizer=l2(0.001)))
        model.add(BatchNormalization())
        model.add(Dropout(0.3))
    model.add(Dense(2, activation='softmax'))  # Binary classification output
    model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Fine-tuning DNN
def build_and_tune_dnn(X_train, y_train, X_val, y_val):
    best_accuracy = 0
    best_model = None
    for units in [(128, 64, 32), (100, 50)]:
        for lr in [0.001, 0.0001]:
            model = build_dnn_classifier(X_train.shape[1], learning_rate=lr, units_per_layer=units)
            model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=1, validation_data=(X_val, y_val))
            accuracy = model.evaluate(X_val, y_val, verbose=0)[1]
            if accuracy > best_accuracy:
                best_accuracy = accuracy
                best_model = model
    return best_model

# KNN Classifier with Grid Search
def build_and_tune_knn(X_train, y_train):
    param_grid = {
        'n_neighbors': [3, 5, 7],
        'weights': ['uniform', 'distance'],
        'metric': ['euclidean', 'manhattan']
    }
    knn = KNeighborsClassifier()
    grid_search = GridSearchCV(knn, param_grid, cv=3)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_

# Decision Tree Classifier with Grid Search
def build_and_tune_decision_tree(X_train, y_train):
    param_grid = {
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'criterion': ['gini', 'entropy']
    }
    tree = DecisionTreeClassifier()
    grid_search = GridSearchCV(tree, param_grid, cv=3)
    grid_search.fit(X_train, y_train)
    return grid_search.best_estimator_

# Define the stock symbols
stocks = {
    'Banking': ['JPM', 'BAC', 'WFC'],
    'Communication': ['T', 'VZ', 'CMCSA'],
    'Manufacturing': ['GE', 'MMM', 'HON'],
    'Technology': ['AAPL', 'MSFT', 'GOOGL']
}

# Define the date range
start_date = '2010-01-01'
end_date = '2023-11-30'

# Fetch the data
data = fetch_data(stocks, start_date, end_date)

# Process data and train models
results = {}
for sector, sector_stocks in data.items():
    for stock_symbol, stock_data in sector_stocks.items():
        X_train, X_test, y_train, y_test = prepare_data(stock_data)

        # Scale the data for DNN and KNN
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        X_train_scaled, X_val_scaled, y_train, y_val = train_test_split(X_train_scaled, y_train, test_size=0.1, random_state=42)

        # Splitting a validation set for DNN

        # Splitting the data for Decision Tree classifier
        X_train_tree, X_test_tree, y_train_tree, y_test_tree = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=42)

        # Decision Tree
        tree_model = build_and_tune_decision_tree(X_train_tree, y_train_tree)
        tree_accuracy = tree_model.score(X_test_tree, y_test_tree)





        # DNN
        dnn_model = build_and_tune_dnn(X_train_scaled, y_train, X_val_scaled, y_val)
        dnn_predictions = np.argmax(dnn_model.predict(X_test_scaled), axis=-1)
        dnn_accuracy = accuracy_score(y_test, dnn_predictions)



        # KNN
        knn_model = build_and_tune_knn(X_train_scaled, y_train)
        knn_accuracy = knn_model.score(X_test_scaled, y_test)

        # Decision Tree
        tree_model = build_and_tune_decision_tree(X_train_tree, y_train_tree)
        tree_accuracy = tree_model.score(X_test_tree, y_test_tree)

        # Store results
        if sector not in results:
            results[sector] = {}
        results[sector][stock_symbol] = {
            'DNN Accuracy': dnn_accuracy,
            'KNN Accuracy': knn_accuracy,
            'Decision Tree Accuracy': tree_accuracy
        }

# Display results
for sector, sector_data in results.items():
    print(f"Sector: {sector}")
    for stock, accuracies in sector_data.items():
        print(f"  Stock: {stock}, Accuracies: {accuracies}")
