In [1]:
import pandas as pd
import random
import numpy as np
import os
import warnings
import joblib
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
warnings.filterwarnings('ignore')

In [2]:
trading_list = {
    "IOC": {"price": 102.80, "leverage": 9, "reward": 1.5, "risk": 0.75},
    "TATAPOWER": {"price": 109.35, "leverage": 8, "reward": 1.5, "risk": 0.75},
    "ONGC": {"price": 115.10, "leverage": 9, "reward": 1.5, "risk": 0.75},
    "GAIL": {"price": 161.05, "leverage": 9, "reward": 1.5, "risk": 0.75},
    "ITC": {"price": 203.25, "leverage": 9, "reward": 2, "risk": 1},
    "HINDPETRO": {"price": 262.25, "leverage": 10, "reward": 1.5, "risk": 0.75},
    "DLF": {"price": 265.30, "leverage": 8, "reward": 1.5, "risk": 0.75},
    "AMBUJACEM": {"price": 309.05, "leverage": 9, "reward": 2, "risk": 1},
    "TATAMOTORS": {"price": 315.55, "leverage": 7, "reward": 2, "risk": 1},
    "HINDZINC": {"price": 317.10, "leverage": 10, "reward": 2, "risk": 1},
    "SBIN": {"price": 364.65, "leverage": 9, "reward": 2, "risk": 1},
    "WIPRO": {"price": 507.60, "leverage": 9, "reward": 2, "risk": 1},
    "DABUR": {"price": 529.20, "leverage": 9, "reward": 2, "risk": 1},
    "BHARTIARTL": {"price": 561.75, "leverage": 9, "reward": 2, "risk": 1},
    "ICICIBANK": {"price": 594.75, "leverage": 9, "reward": 2, "risk": 1},
    "TATACONSUM": {"price": 640.05, "leverage": 9, "reward": 2, "risk": 1}
}
ticker_list = list(trading_list.keys())
parallel_trade = 3
wallet = 5000
directory = "data//machine_learning//model//"
if not os.path.exists(directory):
    os.makedirs(directory)
logistic_regression_filename = 'money_manager_logistic_regression.nik'
linear_regression_filename = 'money_manager_linear_regression.nik'

In [3]:
def get_data(days):
    directory = "E:\\Workspace\\Python\\Trading\\src\\notebook\\data\\last_60\\17_05_2021\\days\\"
    name = str(days) + ".csv"
    return pd.read_csv(directory+name)

In [4]:
def split_ticker(data):
    IOC = data[data["ticker"]=="IOC"].reset_index().drop(['index'], axis=1)
    TATAPOWER = data[data["ticker"]=="TATAPOWER"].reset_index().drop(['index'], axis=1)
    ONGC = data[data["ticker"]=="ONGC"].reset_index().drop(['index'], axis=1)
    GAIL = data[data["ticker"]=="GAIL"].reset_index().drop(['index'], axis=1)
    ITC = data[data["ticker"]=="ITC"].reset_index().drop(['index'], axis=1)
    HINDPETRO = data[data["ticker"]=="HINDPETRO"].reset_index().drop(['index'], axis=1)
    DLF = data[data["ticker"]=="DLF"].reset_index().drop(['index'], axis=1)
    AMBUJACEM = data[data["ticker"]=="AMBUJACEM"].reset_index().drop(['index'], axis=1)
    TATAMOTORS = data[data["ticker"]=="TATAMOTORS"].reset_index().drop(['index'], axis=1)
    HINDZINC = data[data["ticker"]=="HINDZINC"].reset_index().drop(['index'], axis=1)
    SBIN = data[data["ticker"]=="SBIN"].reset_index().drop(['index'], axis=1)
    WIPRO = data[data["ticker"]=="WIPRO"].reset_index().drop(['index'], axis=1)
    DABUR = data[data["ticker"]=="DABUR"].reset_index().drop(['index'], axis=1)
    BHARTIARTL = data[data["ticker"]=="BHARTIARTL"].reset_index().drop(['index'], axis=1)
    ICICIBANK = data[data["ticker"]=="ICICIBANK"].reset_index().drop(['index'], axis=1)
    TATACONSUM = data[data["ticker"]=="TATACONSUM"].reset_index().drop(['index'], axis=1)
    return {
    "IOC": IOC,
    "TATAPOWER": TATAPOWER,
    "ONGC": ONGC,
    "GAIL": GAIL,
    "ITC": ITC,
    "HINDPETRO": HINDPETRO,
    "DLF": DLF,
    "AMBUJACEM": AMBUJACEM,
    "TATAMOTORS": TATAMOTORS,
    "HINDZINC": HINDZINC,
    "SBIN": SBIN,
    "WIPRO": WIPRO,
    "DABUR": DABUR,
    "BHARTIARTL": BHARTIARTL,
    "ICICIBANK": ICICIBANK,
    "TATACONSUM": TATACONSUM
}

In [5]:
def process_data(data):
    trade_data = pd.DataFrame(columns=['close'])
    split_data = split_ticker(data)
    ticker_map = {
    "IOC": 1,
    "TATAPOWER": 2,
    "ONGC": 3,
    "GAIL": 4,
    "ITC": 5,
    "HINDPETRO": 6,
    "DLF": 7,
    "AMBUJACEM": 8,
    "TATAMOTORS": 9,
    "HINDZINC": 10,
    "SBIN": 11,
    "WIPRO": 12,
    "DABUR": 13,
    "BHARTIARTL": 14,
    "ICICIBANK": 15,
    "TATACONSUM": 16
}
    for ticker in ticker_list:        
        ticker_data = split_data[ticker]
        ticker_data['wallet'] =random.randint(5000, 20001)
        ticker_data['parallel_trade'] =random.randint(3, 5)
        ticker_data['leverage'] =random.randint(5, 20)
        ticker_data['trading_budget'] =(ticker_data.wallet * ticker_data.leverage)
        ticker_data['share_budget'] =(ticker_data.close * ticker_data.parallel_trade)
        ticker_data['number_of_share'] = round((ticker_data.trading_budget) 
                                               / (ticker_data.share_budget)) - 1        
        trade_data = trade_data.append(ticker_data, ignore_index=True)
    trade_data = trade_data.applymap(lambda ticker: ticker_map.get(ticker) if ticker in ticker_map else ticker)
    return trade_data  

In [6]:
def generate_data():
    display("Started")
    data = pd.DataFrame(columns=['close','ticker'])
    for day in range(1,39):    
        day_data = get_data(day)
        day_data = process_data(day_data[["close",'ticker']])
        data = data.append(day_data, ignore_index=True)
    display("Data loading completed")
    return data

In [7]:
def split_train_data(data):
    x = data[['close','wallet','parallel_trade','leverage','trading_budget','share_budget']]
    y = data['number_of_share']  
    cols_to_norm = x.columns
    x[cols_to_norm] = StandardScaler().fit_transform(x[cols_to_norm])
    return train_test_split(x, y, test_size=0.3)

In [8]:
def create_linear_regression_model(x_train, x_test, y_train, y_test):   
    linear_regression = LinearRegression()
    linear_regression.fit(x_train,y_train)
    display("Training model Completed")
    score = linear_regression.score(x_test,y_test)
    predictions = linear_regression.predict(x_test)
    display("Test score: {0:.2f} %".format(100 * score))    
    display('MAE:', metrics.mean_absolute_error(y_test, predictions))
    display('MSE:', metrics.mean_squared_error(y_test, predictions))
    display('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))
    return linear_regression

In [9]:
def create_logistic_regression_model(x_train, x_test, y_train, y_test):  
    logistic_regression = LogisticRegression()
    logistic_regression.fit(x_train,y_train)
    display("Training model Completed")
    score = logistic_regression.score(x_test,y_test)
    predictions = logistic_regression.predict(x_test)
    display("Test score: {0:.2f} %".format(100 * score))    
    display('MAE:', metrics.mean_absolute_error(y_test, predictions))
    display('MSE:', metrics.mean_squared_error(y_test, predictions))
    display('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, predictions)))
    return logistic_regression

In [10]:
def save_model(model, joblib_file):
    display("Saving model")
    if os.path.exists(joblib_file):
        os.remove(joblib_file)
    joblib.dump(model, joblib_file)

In [11]:
 def save(data,name,index=False):
        data.to_csv(name, index=index)

In [12]:
def load_model(joblib_file):
    display("Loading model")
    return joblib.load(joblib_file)

In [13]:
regression_data = generate_data()
x_train, x_test, y_train, y_test = split_train_data(regression_data)

'Started'

'Data loading completed'

In [14]:
filename ="data\\last_60\\17_05_2021\\days\\regression_data.csv"
save(regression_data,filename)

In [15]:
joblib_file = directory + logistic_regression_filename
save_model(create_logistic_regression_model(x_train, x_test, y_train, y_test),joblib_file)

'Training model Completed'

'Test score: 19.84 %'

'MAE:'

19.169517543859648

'MSE:'

2104.1316520467835

'RMSE:'

45.87081481777693

'Saving model'

In [16]:
joblib_file = directory + linear_regression_filename
save_model(create_linear_regression_model(x_train, x_test, y_train, y_test),joblib_file)

'Training model Completed'

'Test score: 72.68 %'

'MAE:'

64.67637807298925

'MSE:'

7529.978287887301

'RMSE:'

86.77544749459551

'Saving model'