# Stock Market Trend Prediction

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow.compat.v1 as tf

tf.disable_v2_behavior()
tf.compat.v1.disable_eager_execution()

# List of all csv filenames
GAS_TRAIN_DATA = 'CSV_Files/Gas Data Last Year.csv'
GAS_TEST_DATA = 'CSV_Files/Gas Data Last Month.csv'
GOLD_TRAIN_DATA = 'CSV_Files/Gold Data Last Year.csv'
GOLD_TEST_DATA = 'CSV_Files/Gold Data Last Month.csv'
OIL_TRAIN_DATA = 'CSV_Files/Oil Data Last Year.csv'
OIL_TEST_DATA = 'CSV_Files/Oil Data Last Month.csv'
SILVER_TRAIN_DATA = 'CSV_Files/Silver Data Last Year.csv'
SILVER_TEST_DATA = 'CSV_Files/Silver Data Last Month.csv'
COAL_DATA = '/Users/armanbhullar55gmail.com/Downloads/Finance_Data - Sheet2.csv'


# Data sets for stock we are currently assessing
current_train_data = GOLD_TRAIN_DATA
current_test_data = GOLD_TEST_DATA

# Number of data points to retrieve from csv files (varies with each stock we assess)
NUM_TRAIN_DATA_POINTS = 266
NUM_TEST_DATA_POINTS = 22

LEARNING_RATE = 0.1
NUM_EPOCHS = 100


# Function to load data that we want from csv files and return final and opening prices and volume for each day
def load_stock_data(stock_name, num_data_points):
    data = pd.read_csv(stock_name,
                       skiprows=0,
                       nrows=num_data_points,
                       usecols=['Price', 'Open', 'Vol.'])
    # Prices of stock at the end of each day
    final_prices = data['Price'].astype(str).str.replace(',','').astype(np.float64)
    # Prices of stock at the beginning of each day
    opening_prices = data['Open'].astype(str).str.replace(',', '').astype(np.float64)
    # Volume of stock exchanged throughout the day
    volumes = data['Volume'].str.strip(' M').astype(np.float64)
    return final_prices, opening_prices, volumes


# Function to calculate differences between opening price of the next day and final price of the current day
def calculate_price_differences(final_prices, opening_prices):
    price_differences = []
    for d_i in range(len(final_prices) - 1):
        price_difference = opening_prices[d_i + 1] - final_prices[d_i]
        price_differences.append(price_difference)
    return price_differences


def calculate_accuracy(expected_values, actual_values):
    num_correct = 0
    for a_i in range(len(actual_values)):
        if actual_values[a_i] < 0 < expected_values[a_i]:
            num_correct += 1
        elif actual_values[a_i] > 0 > expected_values[a_i]:
            num_correct += 1
    return (num_correct / len(actual_values)) * 100


# Training data sets
train_final_prices, train_opening_prices, train_volumes = load_stock_data(current_train_data, NUM_TRAIN_DATA_POINTS)
train_price_differences = calculate_price_differences(train_final_prices, train_opening_prices)
train_volumes = train_volumes[:-1]

# Testing data sets
test_final_prices, test_opening_prices, test_volumes = load_stock_data(current_test_data, NUM_TEST_DATA_POINTS)
test_price_differences = calculate_price_differences(test_final_prices, test_opening_prices)
test_volumes = test_volumes[:-1]

# Building computational graph after y = Wx + b

# Used to input volumes
x = tf.placeholder(tf.float32, name='x')
# Variables that our model will change to get actual output as close to expected output as possible
W = tf.Variable([.1], name='W')
b = tf.Variable([.1], name='b')
# How our model outputs the actual values
y = W * x + b
# Used to input expected values for training purposes (shows the model what a "good" outcome is)
y_predicted = tf.placeholder(tf.float32, name='y_predicted')

# Loss function based on the difference between actual and expected outputs
loss = tf.reduce_sum(tf.square(y - y_predicted))
# Optimizer aimed at minimizing loss by changing W and b
optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss)

# Session is used to actually run the nodes
session = tf.Session()
# Need to initialize global variables
session.run(tf.global_variables_initializer())
for _ in range(NUM_EPOCHS):
    # Run the optimizer which will allow it to change the values of W and b to minimize loss
    session.run(optimizer, feed_dict={x: train_volumes, y_predicted: train_price_differences})

results = session.run(y, feed_dict={x: test_volumes})
accuracy = calculate_accuracy(test_price_differences, results)
print("Accuracy of model: {0:.2f}%".format(accuracy))

# Plotting purposes only, not necessary
import plotly.express as px
# fig = px.scatter()
df = [train_price_differences, train_volumes]
import numpy as np
df = np.transpose(df)
df = pd.DataFrame(df, columns=['Price Differences', 'Volumns'])
px.scatter(df, x='Volumns', y='Price Differences')


Instructions for updating:
non-resource variables are not supported in the long term
Accuracy of model: 61.90%


2023-07-08 11:51:39.902912: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:357] MLIR V1 optimization pass is not enabled
2023-07-08 11:51:39.909118: W tensorflow/tsl/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [12]:
COAL_DATA = '/Users/armanbhullar55gmail.com/Downloads/Finance_Data - Sheet2.csv'

0       4.2
1       3.6
2       9.6
3       8.8
4      12.2
       ... 
121     7.9
122     3.7
123     4.9
124     7.3
125     6.4
Name: Volume, Length: 126, dtype: float64

In [9]:

gf = pd.read_csv(current_test_data)
gf

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,"Oct 25, 2017",1279.00,1278.00,1281.30,1272.00,379.40K,0.05
1,"Oct 24, 2017",1278.30,1283.90,1285.30,1274.80,268.91K,-0.2
2,"Oct 23, 2017",1280.90,1281.50,1284.70,1273.60,284.48K,0.03
3,"Oct 20, 2017",1280.50,1292.00,1292.90,1279.30,314.81K,-0.74
4,"Oct 19, 2017",1290.00,1282.80,1292.80,1277.60,355.71K,0.55
5,"Oct 18, 2017",1283.00,1286.90,1290.80,1278.60,258.54K,-0.25
6,"Oct 17, 2017",1286.20,1297.20,1298.40,1283.20,342.41K,-1.29
7,"Oct 16, 2017",1303.00,1306.00,1308.40,1292.50,271.28K,-0.12
8,"Oct 13, 2017",1304.60,1296.00,1306.40,1292.90,297.57K,0.62
9,"Oct 12, 2017",1296.50,1294.60,1299.80,1291.80,246.08K,0.59


In [4]:
GAS_TRAIN_DATA = 'CSV_Files/Gas Data Last Year.csv'
GAS_TRAIN_DATA
import pandas as pd

In [5]:
data = pd.read_csv('CSV_Files/Gas Data Last Year.csv')

In [6]:
data

Unnamed: 0,Date,Price,Open,High,Low,Vol.,Change %
0,"Sep 27, 2017",2.974,2.917,2.989,2.912,15.71K,1.92
1,"Sep 26, 2017",2.918,2.921,2.955,2.898,54.99K,-0.03
2,"Sep 25, 2017",2.919,2.932,2.986,2.911,73.23K,-1.35
3,"Sep 22, 2017",2.959,2.957,2.978,2.945,109.53K,0.44
4,"Sep 21, 2017",2.946,3.081,3.091,2.942,271.68K,-4.78
...,...,...,...,...,...,...,...
262,"Sep 29, 2016",2.959,3.010,3.032,2.956,131.81K,0.24
263,"Sep 28, 2016",2.952,2.994,2.994,2.905,8.88K,-1.47
264,"Sep 27, 2016",2.996,3.011,3.023,2.977,45.16K,-0.03
265,,,,,,,
