In [55]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import matplotlib
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, accuracy_score
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import GridSearchCV

from datetime import date

import yfinance as yf

import pickle

In [56]:
def PlotData(data, graph_title, graph_name):
    matplotlib.use('Agg')
    # plt.figure(figsize=(10, 6))
    plt.plot(data.index, data)
    plt.title(graph_title)
    # plt.show()

    plt.savefig(f'{graph_name}.png', format='png')

In [101]:
def MinMaxData(data, col_name):
    x = data.describe()
    x = x[col_name]
    # minVal = np.int32(x.loc['min'])
    minVal = x.loc['min']
    # minVal = round(minVal, 4)
    # maxVal = np.int32(x.loc['max'])
    # maxVal = round(maxVal, 4)
    maxVal = x.loc['max']

    return minVal, maxVal

In [58]:
def DataImport(stock_name):
    # t_day = date.today()
    # data = yf.download(stock_name, start=f"{t_day.year - 5}-01-01", end=f"{t_day.year}-{t_day.month-1}-01")
    data = yf.download(stock_name)
    close_data = data['Close']
    # PlotData(close_data, stock_name, "fullStock")
    open_min, open_max = MinMaxData(data, 'Open')
    high_min, high_max = MinMaxData(data, 'High')
    low_min, low_max = MinMaxData(data, 'Low')
    vol_min, vol_max = MinMaxData(data, 'Volume')
    
    d = {"open_min": open_min, "open_max": open_max, "high_min": high_min, "high_max": high_max, 
    "low_min": low_min, "low_max": low_max, "vol_min": vol_min, "vol_max": vol_max}

    return data, d


In [102]:
data, d = DataImport("ETH-USD")

[*********************100%%**********************]  1 of 1 completed


In [89]:
PlotData(data['Close'], "INFY", "fullStock")

In [90]:
data

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1999-03-11,0.583984,0.781250,0.583984,0.732422,0.483835,172512000
1999-03-12,0.734375,0.761719,0.671875,0.728516,0.481255,39897600
1999-03-15,0.730469,0.730469,0.658203,0.662109,0.437387,12672000
1999-03-16,0.667969,0.691406,0.656250,0.656250,0.433516,9984000
1999-03-17,0.656250,0.699219,0.652344,0.660156,0.436097,15449600
...,...,...,...,...,...,...
2024-04-15,17.690001,17.709999,17.260000,17.270000,17.270000,11374300
2024-04-16,16.980000,17.190001,16.980000,17.049999,17.049999,13033300
2024-04-17,17.139999,17.180000,16.870001,16.950001,16.950001,17034100
2024-04-18,16.260000,16.900000,16.040001,16.510000,16.510000,31702900


In [91]:
def modelBuilding(data):
    columns_to_4decimal = ['Open', 'High', 'Low', 'Close']

    data[columns_to_4decimal] = np.round(data[columns_to_4decimal],4)
    # print(data)
    X = data[['Open', 'Low', 'High', 'Volume']]
    y = data['Close']

    #split the data into training and test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)


    #creates the svr model
    svr_model = SVR(kernel = 'rbf', C=100, epsilon=0.1)


    #train the model
    svr_model.fit(X_train_scaled, y_train)

    scaler = MinMaxScaler()
    X_train_normalized = scaler.fit_transform(X_train)
    X_test_normalized = scaler.transform(X_test)


    #defines the parameter for grid search 
    param_grid = {
        'kernel': ['linear', 'rbf'],
        'C': [1, 10, 50, 100],
        'epsilon': [0.01, .1, 0.2, 0.5]
    }
    svr = SVR()

    grid_search = GridSearchCV(svr, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=-1)
    normal_pred = grid_search.fit(X_train_normalized, y_train)

    best_params = grid_search.best_params_


    #trains the model with the best parameter 
    best_svr_model = SVR(**best_params)
    best_svr_model.fit(X_train_normalized, y_train)

    return best_svr_model, scaler

In [103]:
model, scaler = modelBuilding(data)

In [64]:
def Next30_days(data, d):
    new_data = pd.DataFrame()
    new_data['Date'] = pd.to_datetime(data.index)
    new_data = new_data.sort_values(by='Date')

    # Extract the last date in the data
    last_date = new_data['Date'].max()

    # Generate dates for the next 30 days
    next_30_days_dates = pd.date_range(start=last_date + pd.Timedelta(days=1), periods=30, freq='D')

    # Create a DataFrame for the next 30 days data
    next_30_days_data = pd.DataFrame({'Date': next_30_days_dates})
    
    # next_30_days_data['Volume'] = np.random.randint(100000, 500000, size=len(next_30_days_data))
    next_30_days_data['Open'] = np.random.randint(d['open_min'], d['open_max'], size=len(next_30_days_data))
    next_30_days_data['Low'] = np.random.randint(d['low_min'], d['low_max'], size=len(next_30_days_data))
    next_30_days_data['High'] = np.random.randint(d['high_min'], d['high_max'], size=len(next_30_days_data))
    next_30_days_data['Volume'] = np.random.randint(d['vol_min'], d['vol_max'], size=len(next_30_days_data))

    next_30_days_data.set_index('Date', inplace=True)
    # Print or use the prepared next 30 days data
    # print(next_30_days_data)

    return next_30_days_data

In [98]:
def predictData(data, d):
    next_30_days_data = Next30_days(data, d)

    next_30_days_scaled = scaler.transform(next_30_days_data)

    # Make predictions for the next 30 days
    predictions_next_30_days = model.predict(next_30_days_scaled)

    return predictions_next_30_days, next_30_days_data

In [99]:
pred, next_data = predictData(data, d)

In [67]:
def CombineData(data, pred, next_data):
    original_close_data = pd.DataFrame()
    original_close_data['Close'] = data['Close']
    df = pd.DataFrame(pred, index=next_data.index)
    df.columns = ['Close']
    # original_close_data
    # df
    predicted_30_days_data_combined = pd.concat([original_close_data, df])
    
    return predicted_30_days_data_combined

In [85]:
data_final = CombineData(data, pred, next_data)

In [69]:
def plotPredict(data_final, stock_name):
    x = data_final['Close']
    x = x[::-100]
    PlotData(x, f"{stock_name} Prediction", "Data_Prediction")

In [86]:
plotPredict(data_final, "INFY")
# PlotData(x, f"INFY Prediction", "Data_Prediction")

In [10]:
def OneYear_data(data):
    data = data['Close']
    one_year_data = data.tail(365)
    # PlotData(one_year_data, "Yearly Stock Data", "yearly")
    return one_year_data

In [49]:
one_year_data = OneYear_data(data)
# PlotData(one_year_data, "Yearly Stock Data", "yearly")

In [11]:
def LastMonth_data(data):
    data = data['Close']
    last_month_data = data.tail(30)
    # PlotData(last_moth_data, "Monthly Stock Data", "monthly")
    return last_month_data

In [50]:
last_month_data = LastMonth_data(data)
# PlotData(last_month_data, "Monthly Stock Data", "monthly")

In [12]:
def LastWeek_data(data):
    data = data['Close']
    last_week_data = data.tail(7)
    # PlotData(last_week_data, "Weekly Stock Data", "weekly")

    return last_week_data

In [51]:
last_week_data = LastWeek_data(data)
# PlotData(last_week_data, "Weekly Stock Data", "weekly")

In [52]:
full_data = {
    "model": model,
    "scaler": scaler,
    "x_data": x,
    "one_year_data": one_year_data,
    "last_month_data": last_month_data,
    "last_weed_data": last_week_data,
}

In [53]:
with open('./stockpred/Model/svm_model.pkl', 'wb') as file:
    pickle.dump(full_data, file)

In [18]:
with open('./stockpred/Model/svm_model.pkl', 'rb') as file:
    load_data = pickle.load(file)

In [19]:
data, d = load_data['dataImport']("INFY")

[*********************100%%**********************]  1 of 1 completed
