In [1]:
! pip install yfinance

Defaulting to user installation because normal site-packages is not writeable


In [2]:
import yfinance as yf
import pandas as pd
import numpy as np

In [3]:
print("NumPy Version:", np.__version__)

NumPy Version: 1.23.5


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Step 1: Load tickers from a CSV file
tickers_df = pd.read_csv('Tickers.csv')  # Ensure this file contains updated tickers
tickers = tickers_df['Ticker'].tolist()  # Adjust based on the column name in your CSV

In [None]:
tickers

In [None]:
from datetime import datetime, timedelta
# Get today's date as the end date
end_date = datetime.today().strftime('%Y-%m-%d')
# Calculate the start date (10 years before end date)
start_date = (datetime.today().replace(year=datetime.today().year - 10)).strftime('%Y-%m-%d')

In [None]:
print("Start Date:", start_date)
print("End Date:", end_date)

In [None]:
# Download historical data for each ticker without specifying date range
data = yf.download(tickers, start=start_date, end=end_date, progress=False, auto_adjust=False)
stock_data = data.stack(level=1).reset_index()
# Rename columns to make it more clear
stock_data.columns = ['Date', 'Ticker', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']

In [None]:
stock_data.head()

In [None]:
stock_data['Ticker'].value_counts()

In [None]:
stock_data.tail()

In [None]:
s_data = stock_data.copy()

In [None]:
# View dimensions of dataset   
rows, col = stock_data.shape
print ("Dimensions of dataset: {}" . format (stock_data.shape))
print ('Rows:', rows, '\nColumns:', col)

In [None]:
# Dtype
print(f'The data type contains:\n object --> {stock_data.dtypes.value_counts()[1]}\n float64 --> {stock_data.dtypes.value_counts()[0]}')

In [None]:
stock_data.info()

In [None]:
float_cols = stock_data.columns.drop(["Ticker","Date"])
stock_data[float_cols] = stock_data[float_cols].round().astype("Int64")
stock_data.head()

In [None]:
stock_data.sort_values(by="Date").head()

In [None]:
stock_data1 = stock_data[(stock_data['Ticker'] == 'RELIANCE.NS')]    
stock_data1.head()

In [None]:
# View dimensions of dataset   
rows, col = stock_data1.shape
print ("Dimensions of dataset: {}" . format (stock_data1.shape))
print ('Rows:', rows, '\nColumns:', col)

In [None]:
# Dtype
print(f'The data type contains:\n object --> {stock_data1.dtypes.value_counts()[1]}\n float64 --> {stock_data1.dtypes.value_counts()[0]}')

In [None]:
stock_data1.info()

In [None]:
stock_data2 = stock_data1.copy()

In [None]:
# Dtype
print(f'The data type contains:\n object --> {stock_data2.dtypes.value_counts()[1]}\n int64 --> {stock_data2.dtypes.value_counts()[0]}')

In [None]:
stock_data2.info()

In [None]:
stock_data2.sort_values(by="Date").head()

In [None]:
# Plotting count plot by year
Year = stock_data2['Date'].dt.year

In [None]:
plt.figure(figsize = (20, 10))
sns.set_style('darkgrid')
ax=sns.countplot(data=stock_data2, x=Year, palette='viridis')
for cont in ax.containers:
    ax.bar_label(cont)
plt.ylabel("Count", fontsize= 15)
plt.xlabel("Year", fontsize= 15)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Target-Date
sns.jointplot(data=stock_data2, x="Date", y="Adj Close")
sns.set_style('darkgrid')
plt.ylabel("Adj Close", fontsize=11)
plt.xlabel("Date", fontsize=11)
plt.show()

In [None]:
# The data distribution in "Volume"
plt.figure(figsize=(7,5), dpi=80)
sns.kdeplot(data=stock_data2, x="Volume", fill=True, common_norm=False,
            palette="crest", alpha=.5, linewidth=0)
plt.show()

In [None]:
# Assessing the 0 values of the "Volume" column
len(stock_data2[stock_data2["Volume"]==0])

In [None]:
stock_data3 = stock_data2[stock_data2["Volume"] != 0]

In [None]:
stock_data3.head()

In [None]:
# Assessing the 0 values of the "Volume" column
len(stock_data3[stock_data3["Volume"]==0])

In [None]:
# Assessing the 0 values of the "Volume" column
len(stock_data[stock_data["Volume"]==0])

In [None]:
stock_data = stock_data[stock_data["Volume"] != 0]

In [None]:
s_data2=stock_data.copy()

In [None]:
# Plotting count plot by year
Year1 = stock_data['Date'].dt.year

In [None]:
plt.figure(figsize = (20, 10))
sns.set_style('darkgrid')
ax=sns.countplot(data=stock_data, x=Year1, palette='viridis')
for cont in ax.containers:
    ax.bar_label(cont)
plt.ylabel("Count", fontsize= 15)
plt.xlabel("Year", fontsize= 15)
plt.xticks(rotation=90)
plt.show()

In [None]:
for i in stock_data.columns.drop(['Ticker','Date']):
    print("The min value of ",i," is ",stock_data[i].min())

In [None]:
for i in stock_data.columns.drop(['Ticker','Date']):
    print("The max value of ",i," is ",stock_data[i].max())

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
columns_no_varience = []
for col in stock_data.columns:
    if len(stock_data[col].unique()) ==1:
        columns_no_varience.append(col)
        stock_data.drop([col], axis = 1, inplace = True)
print(columns_no_varience)
print (stock_data.shape)

In [None]:
duplicate_rows = stock_data.duplicated()
print(duplicate_rows.any())
stock_data.drop_duplicates(inplace = True)
print(stock_data.shape)

In [None]:
## Checking For Duplicated Rows in Data
stock_data.duplicated().sum()

In [None]:
stock_data_numerical_columns = ['Open', 'High', 'Close', 'Low', 'Adj Close']
for column in stock_data_numerical_columns:
    print('Skewness of', column, 'before outlier treatment: ', stock_data[column].skew())

In [None]:
stock_data[['Open', 'High', 'Close', 'Low', 'Adj Close']].hist (figsize = (15,7),
       color = 'dodgerblue', density = True, edgecolor = 'black', linewidth = 0.5 )
plt.show()

In [None]:
stock_data[['Open', 'High', 'Close', 'Low', 'Adj Close']].plot(kind = 'box',
                                                                        subplots = True, layout = (2,3), figsize = (17,5))
plt.show()

In [None]:
stock_data_numerical_columns = ['Open', 'High', 'Close', 'Low', 'Adj Close']
for column in stock_data_numerical_columns:
    Q1 = stock_data[column].quantile(0.25)
    Q3 = stock_data[column].quantile(0.75)
    IQR = Q3 - Q1
    lower = Q1 - 1.5 * IQR
    upper = Q3 + 1.5 * IQR

    # Drop NaN values before comparison
    column_data = stock_data[column].dropna()
    outliers = [x for x in column_data if x < lower or x > upper]

    print(column)
    print('Identified outliers: %d' % len(outliers))
    print('*' * 40)

In [None]:
# Drop all NaN values
stock_data = stock_data.dropna()

In [None]:
stock_data['Open'].isna().sum()

In [None]:
stock_data['Open'].plot(kind='density', figsize = (4, 3))
plt.title('Density Plot of Open before imputation')
plt.axvline(x=stock_data['Open'].mean(), color='red', linestyle='--', label='Mean')
plt.legend()
plt.show()

In [None]:
Skewness_of_Open = stock_data['Open'].skew()
Skewness_of_Open

In [None]:
stock_data['Close'].isna().sum()

In [None]:
stock_data['Close'].plot(kind='density', figsize = (4, 3))
plt.title('Density Plot of Close before imputation')
plt.axvline(x=stock_data['Close'].mean(), color='red', linestyle='--', label='Mean')
plt.legend()
plt.show()

In [None]:
Skewness_of_close = stock_data['Close'].skew()
Skewness_of_close

In [None]:
stock_data['Low'].isna().sum()

In [None]:
stock_data['Low'].plot(kind='density', figsize = (4, 3))
plt.title('Density Plot of Low before imputation')
plt.axvline(x=stock_data['Low'].mean(), color='red', linestyle='--', label='Mean')
plt.legend()
plt.show()

In [None]:
Skewness_of_low = stock_data['Low'].skew()
Skewness_of_low

In [None]:
stock_data['High'].isna().sum()

In [None]:
stock_data['High'].plot(kind='density', figsize = (4, 3))
plt.title('Density Plot of High before imputation')
plt.axvline(x=stock_data['High'].mean(), color='red', linestyle='--', label='Mean')
plt.legend()
plt.show()

In [None]:
Skewness_of_high = stock_data['High'].skew()
Skewness_of_high

In [None]:
stock_data['Adj Close'].isna().sum()

In [None]:
stock_data['Adj Close'].plot(kind='density', figsize = (4, 3))
plt.title('Density Plot of Adj Close before imputation')
plt.axvline(x=stock_data['High'].mean(), color='red', linestyle='--', label='Mean')
plt.legend()
plt.show()

In [None]:
Skewness_of_high = stock_data['Adj Close'].skew()
Skewness_of_high

In [None]:
stock_data['Volume'].isna().sum()

In [None]:
Skewness_of_vol = stock_data['Volume'].skew()
Skewness_of_vol

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(14, 6))  # Increase figure size for better visibility

# Use seaborn style
sns.set_style('whitegrid')

# Plot each ticker’s close price
for ticker in stock_data['Ticker'].unique():
    plt.plot(stock_data[stock_data['Ticker'] == ticker]['Date'], 
             stock_data[stock_data['Ticker'] == ticker]['Close'], 
             label=ticker, linewidth=2)  # Add label and set line width

# Formatting the plot
plt.xlabel("Date", fontsize=12)
plt.ylabel("Close Price", fontsize=12)
plt.title("Close Prices of Multiple Stock Tickers", fontsize=14, fontweight='bold')
plt.legend(title="Tickers", bbox_to_anchor=(1.05, 1), loc='upper left')  # Legend outside for clarity
plt.xticks(rotation=45)  # Rotate x-axis labels for readability
plt.grid(True, linestyle='--', alpha=0.6)  # Add dashed grid lines

# Show plot
plt.show()


In [None]:
stock_data['Ticker'].value_counts()

In [None]:
stock_data.loc[(stock_data['Ticker'] == 'MARUTI.BO') , 'Ticker'] = np.nan

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
stock_data = stock_data.dropna(subset=['Ticker'])

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
stock_data['Ticker'].value_counts()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(14, 6))  # Increase figure size for better visibility

# Use seaborn style
sns.set_style('whitegrid')

# Plot each ticker’s close price
for ticker in stock_data['Ticker'].unique():
    plt.plot(stock_data[stock_data['Ticker'] == ticker]['Date'], 
             stock_data[stock_data['Ticker'] == ticker]['Close'], 
             label=ticker, linewidth=2)  # Add label and set line width

# Formatting the plot
plt.xlabel("Date", fontsize=12)
plt.ylabel("Close Price", fontsize=12)
plt.title("Close Prices of Multiple Stock Tickers", fontsize=14, fontweight='bold')
plt.legend(title="Tickers", bbox_to_anchor=(1.05, 1), loc='upper left')  # Legend outside for clarity
plt.xticks(rotation=45)  # Rotate x-axis labels for readability
plt.grid(True, linestyle='--', alpha=0.6)  # Add dashed grid lines

# Show plot
plt.show()

In [None]:
stock_data[['Open', 'High', 'Close', 'Low', 'Adj Close']].plot(kind = 'box',
                                                                        subplots = True, layout = (2,3), figsize = (17,5))
plt.show()

In [None]:
stock_data[['Open', 'High', 'Close', 'Low', 'Adj Close']].hist (figsize = (15,7),
       color = 'dodgerblue', density = True, edgecolor = 'black', linewidth = 0.5 )
plt.show()

In [None]:
# Now let's plot the total volume of stock being traded each day over the past year
stock_data3['Volume'].plot(legend=True, figsize=(10,4))

In [None]:
# Condition for Bullish case: Low ≤ Open ≤ Close ≤ High
bullish_condition = (stock_data["Low"] <= stock_data["Open"]) & (stock_data["Open"] <= stock_data["Close"]) & (stock_data["Close"] <= stock_data["High"])
# Condition for Bearish case: Low ≤ Close ≤ Open ≤ High
bearish_condition = (stock_data["Low"] <= stock_data["Close"]) & (stock_data["Close"] <= stock_data["Open"]) & (stock_data["Open"] <= stock_data["High"])
# Rows that do not satisfy either condition
invalid_rows = stock_data[~(bullish_condition | bearish_condition)]
# Print invalid rows if any
if not invalid_rows.empty:
    print("Invalid rows found that do not match Bullish or Bearish cases:")
    print(invalid_rows)
else:
    print("All rows in stock_data are valid.")

In [None]:
# Fix the Low price if it's higher than Open or Close
stock_data.loc[stock_data["Low"] > stock_data["Open"], "Low"] = stock_data["Open"]
stock_data.loc[stock_data["Low"] > stock_data["Close"], "Low"] = stock_data["Close"]

In [None]:
# Re-check if all rows are now valid
bullish_condition = (stock_data["Low"] <= stock_data["Open"]) & (stock_data["Open"] <= stock_data["Close"]) & (stock_data["Close"] <= stock_data["High"])
bearish_condition = (stock_data["Low"] <= stock_data["Close"]) & (stock_data["Close"] <= stock_data["Open"]) & (stock_data["Open"] <= stock_data["High"])

# Find still invalid rows
invalid_rows = stock_data[~(bullish_condition | bearish_condition)]

if invalid_rows.empty:
    print("All rows are now valid!")
else:
    print("Some rows are still invalid:")
    print(invalid_rows)

In [None]:
stock_data.describe()

In [None]:
stock_data.corr()   

In [None]:
# plot lineplot
stock = 'RELIANCE.NS'
plt.figure(figsize = (10,5))
sns.lineplot(data = stock_data3,x='Date',y='Adj Close')
plt.title(f'{stock} Closing Price')
plt.show()

In [None]:
stock_data3['Volume'].plot(legend=True,figsize=(10,4))

In [None]:
Tech_list=stock_data["Ticker"].unique()
Tech_list

In [None]:
plt.figure(figsize=(20,15))
for i, company in enumerate(Tech_list,1):
    plt.subplot(5,4,i)
    plt.title(company)
    df=stock_data[stock_data["Ticker"]== company]
    plt.plot(df["Date"], df["Close"])
    plt.xticks(rotation="vertical")

In [None]:
import plotly.express as px
for comapny in Tech_list:
    df= stock_data[stock_data["Ticker"]== company]
    plot=px.line(df,x="Date",y= "Volume",title=comapny)
    plot.show()

In [None]:
stock_data3.head()

In [None]:
col=['Open','Close','Low','High','Adj Close']
stock_data3[col] = stock_data3[col].astype(float)

In [None]:
stock_data3["Daily price change"]= stock_data3["Close"]-stock_data3["Open"]

In [None]:
stock_data3.head()

In [None]:
stock_data3["% daily price change"]= ((stock_data3["Daily price change"])/stock_data3["Close"])*100

In [None]:
stock_data3.head()

In [None]:
plot=px.line(stock_data3, x="Date", y="% daily price change", title= "RELIANCE.NS")
plot.show()

In [None]:
fig = px.line(stock_data3, x="Date", y="% daily price change", title='Daily price change in stock' ,template = 'plotly_dark')
fig.show()

In [None]:
stock_data3["Smoothed Change"] = stock_data3["% daily price change"].rolling(window=20).mean()

In [None]:
plot = px.line(stock_data3, x="Date", y="Smoothed Change", title="RELIANCE.NS - Smoothed Daily Price Change")
plot.show()

In [None]:
stock_data3.info()

In [None]:
# Convert 'Date' column to datetime if it's not already in datetime format
stock_data3['Date'] = pd.to_datetime(stock_data3['Date'])
# Set 'Date' as the index
stock_data3.set_index('Date', inplace=True)
# Now you can resample by month and plot the mean of 'Close' prices
stock_data3['Close'].resample('M').mean().plot()

In [None]:
stock_data3['Close'].resample('Y').mean().plot()

In [None]:
# Pivot the dataset to get Close prices for each ticker
close_prices = s_data.pivot(columns='Ticker', values='Close')
# Reset the index to simple integers
close_prices.reset_index(drop=True, inplace=True)
# Assign index as 1, 2, 3...
close_prices.index = range(1, len(close_prices) + 1)

In [None]:
close_prices.head()

In [None]:
# Fill missing values using forward fill
close_prices.fillna(method='ffill', inplace=True)
# Optionally, use backward fill
close_prices.fillna(method='bfill', inplace=True)

In [None]:
close_prices.head()

In [None]:
close_prices.isnull().sum()

In [None]:
missing_values = pd.DataFrame(close_prices.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
sns.pairplot(data=close_prices)

In [None]:
sns.heatmap(close_prices.corr(),annot=True,cmap='gray_r',linecolor="black")

In [None]:
plt.figure(figsize=(10,10))
stock_data3['Close'].resample("Y").mean().plot(kind="bar")
plt.xticks(rotation="vertical")

In [None]:
# Let's see a historical view of the closing price
stock_data3['Close'].plot(legend=True, figsize = (12, 6))
plt.show()

In [None]:
# Now let's plot the total volume of stock being traded each day over the past 5 years
stock_data3['Volume'].plot(legend=True,figsize=(10,4))
plt.show()

In [None]:
# Let's go ahead and plot out several moving averages
ma_day = [10,20,50]
for ma in ma_day:
    column_name = "MA for %s days" %(str(ma))
    stock_data3[column_name]=pd.DataFrame.rolling(stock_data3['Close'],ma).mean()

In [None]:
stock_data3[['Close','MA for 10 days','MA for 20 days','MA for 50 days']].plot(subplots=False,figsize=(12,6))
plt.show()

In [None]:
# use pct_change to find the percent change for each day
stock_data3['Daily price change'] =  stock_data3['Close'].pct_change()
# plot the daily return percentage
stock_data3['Daily price change'].plot(figsize=(12,5),legend=True,linestyle='--',marker='o')
plt.show()

In [None]:
s_data.info()

In [None]:
# Step 2: Recalculate the Daily Return
s_data['Daily_Return'] = ((s_data['Close'] - s_data['Open']) / s_data['Close']) * 100

In [None]:
# Pivot the dataset to organize by Ticker
daily_returns = s_data.pivot(index='Date', columns='Ticker', values='Daily_Return')

# Reset the index to integers if required
daily_returns.reset_index(drop=True, inplace=True)
daily_returns.index = range(1, len(daily_returns) + 1)
daily_returns.head() 

In [None]:
daily_returns = daily_returns.drop(columns=["ADANIPOWER.BO"])
daily_returns.head() 

In [None]:
sns.pairplot(data=daily_returns)

In [None]:
sns.heatmap(daily_returns.corr(),annot=True,cmap='gray_r',linecolor="black")

In [None]:
stock_data3.head()

In [None]:
missing_values = pd.DataFrame(stock_data3.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
# Fill missing values using forward fill
stock_data3.fillna(method='ffill', inplace=True)
# Optionally, use backward fill
stock_data3.fillna(method='bfill', inplace=True)

In [None]:
missing_values = pd.DataFrame(stock_data3.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
stock_data.head()

In [None]:
col=['Open','Close','High','Low','Adj Close']
stock_data[col]=stock_data[col].astype(float)

In [None]:
stock_data.head()

In [None]:
stock_data["Daily price change"]= stock_data["Close"]-stock_data["Open"]

In [None]:
stock_data.head()

In [None]:
stock_data["% daily price change"]= ((stock_data["Daily price change"])/stock_data["Close"])*100

In [None]:
# Fetch stock data for each ticker and store it in a dictionary
all_stock_data = {}

for ticker in tickers:
    all_stock_data[ticker] = stock_data  # Store in dictionary

# Define the subplot grid (2 rows, 4 columns)
nrows, ncols = 2, 5  
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 10))  # Set figure size

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each ticker's daily price change on its respective subplot
for i, ticker in enumerate(tickers):
    axes[i].plot(all_stock_data[ticker].index, all_stock_data[ticker]["% daily price change"], color="blue", alpha=0.6)
    axes[i].set_title(ticker)  # Set title as the ticker name
    axes[i].tick_params(axis='x', rotation=30)  # Rotate x-axis labels for better readability
    axes[i].set_ylabel("% Daily Price Change")

# Adjust layout for better spacing
plt.tight_layout()
plt.show()

In [None]:
stock_data["Smoothed Change"] = stock_data["% daily price change"].rolling(window=30).mean()  # Apply smoothing
stock_data.dropna(inplace=True)  # Remove NaN values

In [None]:
# Fetch stock data for each ticker and store it in a dictionary
all_stock_data = {}

for ticker in tickers:
    all_stock_data[ticker] = stock_data  # Store in dictionary

# Define the subplot grid (2 rows, 4 columns)
nrows, ncols = 2, 5  
fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=(18, 10))  # Set figure size

# Flatten the axes array for easier iteration
axes = axes.flatten()

# Plot each ticker's daily price change on its respective subplot
for i, ticker in enumerate(tickers):
    axes[i].plot(all_stock_data[ticker].index, all_stock_data[ticker]["Smoothed Change"], color="blue", alpha=0.6)
    axes[i].set_title(ticker)  # Set title as the ticker name
    axes[i].tick_params(axis='x', rotation=30)  # Rotate x-axis labels for better readability
    axes[i].set_ylabel("Smoothed Change")

# Adjust layout for better spacing
plt.tight_layout()
plt.show()

In [None]:
# Convert 'Date' column to datetime if it's not already in datetime format
stock_data['Date'] = pd.to_datetime(stock_data['Date'])
stock_data.set_index('Date', inplace=True)

In [None]:
# use pct_change to find the percent change for each day
stock_data['Daily price change'] =  stock_data['Close'].pct_change()

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
stock_data = stock_data.dropna(subset=['Daily price change'])

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
# Let's go ahead and plot out several moving averages
ma_day = [10,20,50]
for ma in ma_day:
    column_name = "MA for %s days" %(str(ma))
    stock_data[column_name]=pd.DataFrame.rolling(stock_data['Close'],ma).mean()
stock_data.dropna(inplace=True)  # Remove NaN values

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
stock_data["EMA_10"] = stock_data["Close"].ewm(span=10, adjust=False).mean()  # Short-term trend
stock_data["EMA_50"] = stock_data["Close"].ewm(span=50, adjust=False).mean()  # Medium-term trend
stock_data["EMA_200"] = stock_data["Close"].ewm(span=200, adjust=False).mean()  # Long-term trend

In [None]:
stock_data.head()

In [None]:
# Create target variable (Close Price after 365 days)
stock_data["Close_365"] = stock_data["Adj Close"].shift(-365)
# Drop rows with NaN target values (since shifting creates NaNs)
stock_data = stock_data.dropna()

In [None]:
stock_data.columns

In [None]:
delta = stock_data['Adj Close'].diff()
gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
rs = gain / loss
stock_data['RSI_14'] = 100 - (100 / (1 + rs))

stock_data['SMA_20'] = stock_data['Adj Close'].rolling(window=20).mean()
stock_data['BB_Upper'] = stock_data['SMA_20'] + (stock_data['Close'].rolling(window=20).std() * 2)
stock_data['BB_Lower'] = stock_data['SMA_20'] - (stock_data['Close'].rolling(window=20).std() * 2)

stock_data['MACD'] = stock_data['EMA_10'] - stock_data['EMA_50']

In [None]:
stock_data.head()

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
# Drop rows with NaN target values (since shifting creates NaNs)
stock_data = stock_data.dropna()

In [None]:
missing_values = pd.DataFrame(stock_data.isnull().sum(), columns=['Missing Values'])
missing_values

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Calculate correlation
correlation_matrix = stock_data.corr()

# Plot heatmap
plt.figure(figsize=(10,8))
sns.heatmap(correlation_matrix, annot=True, cmap="coolwarm", fmt=".2f")
plt.title("Feature Correlation Matrix")
plt.show()

In [None]:
# Define features (X) and target variable (y)
X = stock_data.drop(columns=["Close_365","Ticker","Volume","Adj Close"])  # Drop the target variable
y = stock_data["Close_365"]  # Target variable (Close price after 365 days)

In [None]:
from sklearn.preprocessing import MinMaxScaler
from keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import mean_squared_error, r2_score
from sklearn import linear_model
from keras.models import Sequential
import keras.backend as K  # Corrected import syntax
from keras.callbacks import EarlyStopping  # Corrected "Callbacks" to "callbacks"
from keras.optimizers import Adam  # Corrected "Optimisers" to "optimizers"
from keras.models import load_model  # Corrected "Models" to "models"
from keras.utils import plot_model  # Corrected "utils. vis_utils" spacing

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
Scaler = MinMaxScaler()
X_train_scaled = Scaler.fit_transform(X_train)
X_test_scaled = Scaler.transform(X_test)

# Scale y
Scaler_y = MinMaxScaler()
y_train_scaled = Scaler_y.fit_transform(y_train.values.reshape(-1, 1))
y_test_scaled = Scaler_y.transform(y_test.values.reshape(-1, 1))

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error

# Initialize and train the Linear Regression model
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train_scaled)

In [None]:
# Make predictions
y_pred = lr_model.predict(X_test_scaled)

In [None]:
# Model Evaluation
mae = mean_absolute_error(y_test_scaled, y_pred)
mse = mean_squared_error(y_test_scaled, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_scaled, y_pred)

# Print results
print("Model Evaluation:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

In [None]:
# Convert DataFrame to NumPy array before reshaping
X_train_array = np.array(X_train_scaled)  # Convert DataFrame to NumPy
X_test_array = np.array(X_test_scaled)    # Convert DataFrame to NumPy

# Reshape for LSTM (3D input: [samples, timesteps, features])
X_train_reshaped = X_train_array.reshape((X_train_array.shape[0], 1, X_train_array.shape[1]))
X_test_reshaped = X_test_array.reshape((X_test_array.shape[0], 1, X_test_array.shape[1]))

In [None]:
# Build Improved LSTM Model
lstm_model = Sequential([
    LSTM(256, return_sequences=True, activation='tanh', input_shape=(1, X_train_reshaped.shape[2])),
    Dropout(0.3),  # Regularization to prevent overfitting
    LSTM(128, return_sequences=False, activation='tanh'),
    Dropout(0.3),
    Dense(64, activation='tanh'),
    Dense(1)  # Output layer (single continuous value for stock price)
])

In [None]:
import tensorflow as tf
# Compile Model with Adam Optimizer & Learning Rate Scheduling
lstm_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')

In [None]:
early_stopping = EarlyStopping(
    monitor='val_loss',  
    patience=10,  # Stop if val_loss doesn't improve for 10 epochs
    restore_best_weights=True,  
    verbose=1
)
history = lstm_model.fit(X_train_reshaped, y_train_scaled, epochs=100, batch_size=64, validation_data=(X_test_reshaped, y_test_scaled), callbacks=[early_stopping])

In [None]:
#  Make Predictions
y_pred_lstm = lstm_model.predict(X_test_reshaped)

In [None]:
# Inverse transform
y_pred_actual = Scaler_y.inverse_transform(y_pred_lstm)
y_test_actual = Scaler_y.inverse_transform(y_test_scaled)

In [None]:
# Metrics
mse = mean_squared_error(y_test_actual, y_pred_actual)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_actual, y_pred_actual)
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

In [None]:
# Assuming `lstm_model` is your trained model
import tensorflow as tf
# Assuming `lstm_model` is your trained model
lstm_model.save("stock_price.keras")  # Save the model in .keras

In [None]:
print(lstm_model.input_shape)  # Check expected input shape

In [None]:
import pickle
# ---- Save the scaler ----
with open('scaler_x.pkl', 'wb') as f:
    pickle.dump(Scaler, f)
# Save output scaler
with open("scaler_y.pkl", "wb") as f:
    pickle.dump(Scaler_y, f)

In [None]:
from sklearn.svm import SVR

# Define an SVR model
svm_model = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.01)

# Train the model on scaled training data (X_train, y_train)
svm_model.fit(X_train_scaled, y_train_scaled.ravel())

In [None]:
# Make predictions
y_pred_svm = svm_model.predict(X_test_scaled)

In [None]:
# Model Evaluation
mae = mean_absolute_error(y_test_scaled, y_pred_svm)
mse = mean_squared_error(y_test_scaled, y_pred_svm)
rmse = np.sqrt(mse)
r2 = r2_score(y_test_scaled, y_pred_svm)

# Print results
print("Model Evaluation:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R² Score: {r2:.4f}")

In [None]:
# Save the trained SVM model
with open("svm_stock_model.pkl", "wb") as f:
    pickle.dump(svm_model, f)

In [None]:
print("Input Shape for SVM Model:", X_train_scaled.shape)