# Fetching data from yfinance Library

In [1]:
import pandas as pd
import yfinance as yf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit

In [2]:
def Fetch_Data():
    df=yf.download(ticker, period="10y") # fetching past 10 years data from Yahoo Finance. 
    df=pd.DataFrame(df)
    df['Date']=pd.to_datetime(df.index)
    
    return df

In [3]:
def Visualize_Data(df):
    # Visualize actual vs. predicted prices using matplotlib
    plt.figure(figsize=(20, 10))
    
    # Actual prices
    plt.plot(df['Date'], df['Close'], label='Close')
    
    plt.title(f'Actual vs. Predicted Prices for {ticker}')
    plt.xlabel('Date')
    plt.ylabel('Price')
    plt.legend()
    plt.grid(True)
    #plt.tight_layout()
    
    plt.show()

    info = yf.Ticker(ticker).info
    for key, value in info.items():
        print(f"{key}: {value}\n")

# Data Pre-Processing:

In [4]:
def PreProcessing():
    print("Dataframe Shape: ", df. shape)
    print("Null Value Present: ", df.isnull().values.any())

    if df.isnull().values.any():
        df.dropna()

    #Set Target Variable
    output_var = pd.DataFrame(df['Close'])
    #Selecting the Features
    features = ['Open', 'High', 'Low', 'Volume']
    
    #Scaling
    scaler = MinMaxScaler()
    feature_transform = scaler.fit_transform(df[features])
    feature_transform= pd.DataFrame(columns=features, data=feature_transform, index=df.index)

    return output_var, features, feature_transform

# Train-Test Split

In [5]:
def Train_Test_Split():
    #Splitting to Training set and Test set
    timesplit= TimeSeriesSplit(n_splits=10)
    for train_index, test_index in timesplit.split(feature_transform):
            X_train, X_test = feature_transform[:len(train_index)], feature_transform[len(train_index): (len(train_index)+len(test_index))]
            y_train, y_test = output_var[:len(train_index)].values.ravel(), output_var[len(train_index): (len(train_index)+len(test_index))].values.ravel()

    return X_train, X_test, y_train, y_test