In [75]:
import joblib
import pandas as pd
import csv
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import accuracy_score, classification_report

In [82]:
# declare tickers
tickers = ['AAPL', 'AMZN', 'KO', 'MSFT']

# Load the model
model_path = "stock_notebooks/models/"

aapl_model = joblib.load(model_path + "AAPL_model.pkl")
amzn_model = joblib.load(model_path + "AMZN_model.pkl")
ko_model = joblib.load(model_path + "KO_model.pkl")
msft_model = joblib.load(model_path + "MSFT_model.pkl")

In [50]:
# Load in the stock data
data_path = "stock_notebooks/stock_data/"

aapl_data = pd.read_csv(data_path + "AAPL_price_data.csv")
amzn_data = pd.read_csv(data_path + "AMZN_price_data.csv")
ko_data = pd.read_csv(data_path + "KO_price_data.csv")
msft_data = pd.read_csv(data_path + "MSFT_price_data.csv")

aapl_data

Unnamed: 0,symbol,datetime,close,high,low,open,volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,r_percent,MACD,MACD_EMA,Price_Rate_Of_Change,On Balance Volume,Prediction
0,AAPL,2020-01-22,79.425003,79.997498,79.327499,79.644997,101832400.0,0.282501,0.000000,0.282501,70.313819,73.187500,79.997498,91.593323,-8.406677,0.501322,0.370618,0.047858,3.813136e+08,1.0
1,AAPL,2020-01-23,79.807503,79.889999,78.912498,79.480003,104472000.0,0.382500,0.000000,0.382500,72.861701,73.187500,79.997498,97.210060,-2.789940,0.536085,0.404918,0.031005,4.857856e+08,1.0
2,AAPL,2020-01-24,79.577499,80.832497,79.379997,80.062500,146537600.0,-0.230003,0.230003,0.000000,68.766699,73.187500,80.832497,83.584071,-16.415929,0.541775,0.433082,0.025715,3.392480e+08,-1.0
3,AAPL,2020-01-27,77.237503,77.942497,76.220001,77.514999,161940000.0,-2.339996,2.339996,0.000000,41.431798,74.290001,80.832497,45.051648,-54.948352,0.394581,0.425204,-0.025271,1.773080e+08,-1.0
4,AAPL,2020-01-28,79.422501,79.599998,78.047501,78.150002,162234000.0,2.184998,0.000000,2.184998,58.993758,74.290001,80.832497,78.448652,-21.551348,0.414774,0.423080,0.016023,3.395420e+08,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1156,AAPL,2024-08-26,227.179993,227.279999,223.889999,226.759995,30602200.0,0.339996,0.000000,0.339996,67.455746,206.389999,228.339996,94.715245,-5.284755,2.382962,1.615274,0.026709,2.420604e+09,1.0
1157,AAPL,2024-08-27,228.029999,228.850006,224.889999,226.000000,35934600.0,0.850006,0.000000,0.850006,69.871562,208.830002,228.850006,95.904060,-4.095940,2.533158,1.798850,0.028459,2.456539e+09,1.0
1158,AAPL,2024-08-28,226.490005,229.860001,225.679993,227.919998,38052200.0,-1.539993,1.539993,0.000000,60.485458,211.970001,229.860001,81.162687,-18.837313,2.499116,1.938903,0.007876,2.418487e+09,-1.0
1159,AAPL,2024-08-29,229.789993,232.919998,228.880005,230.100006,51906300.0,3.299988,0.000000,3.299988,70.337633,215.600006,232.919998,81.928370,-18.071630,2.707212,2.092565,0.016545,2.470393e+09,1.0


In [51]:
# Predict using each model and extract the first prediction result
aapl_pred = aapl_model.predict(aapl_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']])[0]
amzn_pred = amzn_model.predict(amzn_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']])[0]
ko_pred = ko_model.predict(ko_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']])[0]
msft_pred = msft_model.predict(msft_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']])[0]

In [73]:
aapl_X_Cols = aapl_data[['RSI','k_percent','r_percent','Price_Rate_Of_Change','MACD','On Balance Volume']]
aapl_Y_Cols = aapl_data['Prediction']
aapl_X_train, aapl_X_test, aapl_y_train, aapl_y_test = train_test_split(aapl_X_Cols, aapl_Y_Cols, random_state = 0)

amzn_X_Cols = amzn_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']]
amzn_Y_Cols = amzn_data['Prediction']
amzn_X_train, amzn_X_test, amzn_y_train, amzn_y_test = train_test_split(amzn_X_Cols, amzn_Y_Cols, random_state=0)

msft_X_Cols = msft_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']]
msft_Y_Cols = msft_data['Prediction']
msft_X_train, msft_X_test, msft_y_train, msft_y_test = train_test_split(msft_X_Cols, msft_Y_Cols, random_state=0)

ko_X_Cols = ko_data[['RSI', 'k_percent', 'r_percent', 'Price_Rate_Of_Change', 'MACD', 'On Balance Volume']]
ko_Y_Cols = ko_data['Prediction']
ko_X_train, ko_X_test, ko_y_train, ko_y_test = train_test_split(ko_X_Cols, ko_Y_Cols, random_state=0)


In [80]:
# get model accuracies
aapl_accuracy = accuracy_score(aapl_y_test, aapl_model.predict(aapl_X_test), normalize = True) * 100.0
amzn_accuracy = accuracy_score(amzn_y_test, amzn_model.predict(amzn_X_test), normalize=True) * 100.0
msft_accuracy = accuracy_score(msft_y_test, msft_model.predict(msft_X_test), normalize=True) * 100.0
ko_accuracy = accuracy_score(ko_y_test, ko_model.predict(ko_X_test), normalize=True) * 100.0

79.3103448275862

In [93]:
def reward(stock_name, date):
    stock_name = stock_name.upper()
    stock_name_data_path = data_path + f"{stock_name}_price_data.csv"
    
    with open(stock_name_data_path, mode='r') as file:
        csv_reader = csv.DictReader(file)
        
        for row in csv_reader:
            if row['datetime'] == date: 
                return float(row['close']) / float(row['open']) - 1

    # Handle case where the date is not found
    raise ValueError(f"Date {date} not found in {stock_name_data_path}")
    


In [94]:
money = reward("AAPL", "2024-08-01")
money

-0.026786088325655655

In [None]:
# get certain

In [100]:
import csv

csv_file = 'trading_data.csv'  # Ensure this has a proper extension
model_accuracies = [aapl_accuracy, amzn_accuracy, ko_accuracy, msft_accuracy]

for t, ma in zip(tickers, model_accuracies):
    path = data_path + f"{t}_price_data.csv"


    with open(path, mode='r') as file:
        csv_reader = csv.DictReader(file)
        
        with open(csv_file, mode='a', newline='') as outfile:  # Append mode
            fieldnames = ['symbol', 'open', 'close', 'change_in_price', 'prediction', 'model_accuracy', 'reward']
            csv_writer = csv.DictWriter(outfile, fieldnames=fieldnames)
            
            # Write header if the file is empty
            if outfile.tell() == 0:
                csv_writer.writeheader()
            
            for row in csv_reader:
                row_data = {
                    'symbol': row['symbol'],
                    'open': row['open'],
                    'close': row['close'],
                    'change_in_price': row['change_in_price'],
                    'prediction': row['Prediction'],
                    'model_accuracy': ma,
                    'reward': row['Price_Rate_Of_Change']
                }
                csv_writer.writerow(row_data)





In [101]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load the data from the CSV file
df = pd.read_csv('trading_data.csv')

# Encode categorical variables (e.g., symbol)
df_encoded = pd.get_dummies(df, columns=["symbol"])

# Ensure the 'reward' column exists, or define it based on your use case
# Assuming 'reward' is a column you calculate or is part of your dataset:
# df['reward'] = <some calculation>

# Split the data
X = df_encoded.drop(columns=["reward"])
y = df_encoded["reward"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Train a regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Evaluate the model
print(f"Model R^2 Score: {model.score(X_test, y_test)}")



Model R^2 Score: 0.44148009462946514
