In [1]:
import pandas as pd

# Importing the dataset
df = pd.read_csv("./../data/INFY.csv")
df['Volume'] = df['Volume'].astype('float64')

# Converting the date to datetime datatype
df['Date'] = pd.to_datetime(df['Date'])

In [2]:

# Generating technical indicators for the data
df['SMA_10'] = df['Close'].rolling(window=10).mean()
df['SMA_50'] = df['Close'].rolling(window=50).mean()
df['EMA_10'] = df['Close'].ewm(span=10, adjust=False).mean()
df['EMA_50'] = df['Close'].ewm(span=50, adjust=False).mean()

# Define the period for RSI calculation (e.g., 14 days)
period = 14

# Calculate daily price changes
delta = df['Close'].diff()

# Calculate gains and losses
gains = delta.where(delta > 0, 0)
losses = -delta.where(delta < 0, 0)

# Calculate average gains and losses over the specified period
average_gain = gains.rolling(window=period).mean()
average_loss = losses.rolling(window=period).mean()

# Calculate RSI
rs = average_gain / average_loss
rsi = 100 - (100 / (1 + rs))

# Add RSI to the DataFrame
df['RSI'] = rsi

# Calculate the short-term EMA (e.g., 12-day EMA)
short_ema = df['Close'].ewm(span=12, adjust=False).mean()

# Calculate the long-term EMA (e.g., 26-day EMA)
long_ema = df['Close'].ewm(span=26, adjust=False).mean()

# Calculate MACD line
macd_line = short_ema - long_ema

# Calculate the signal line (e.g., 9-day EMA of the MACD line)
signal_line = macd_line.ewm(span=9, adjust=False).mean()

# Add MACD and signal line to the DataFrame
df['MACD'] = macd_line
df['Signal_Line'] = signal_line

In [3]:
# Define the number of days ahead you want to predict (e.g., 1 day)
days_ahead = 1

# Create a new column 'Target_Close' with the closing price shifted by 'days_ahead'
df['Target_Close'] = df['Close'].shift(-days_ahead)

# Drop rows with NaN values (last rows with no future data)
df.dropna(subset=['Target_Close'], inplace=True)

# Create a new column 'Price_Direction' with 1 for an increase and 0 for a decrease or no change
df['Price_Direction'] = (df['Close'] - df['Close'].shift(1)) > 0
df['Price_Direction'] = df['Price_Direction'].astype(int)

In [4]:
# Drop the first row (NaN due to shifting)
df = df.iloc[1:]

# Drop the NaN and Date values
df.dropna(inplace=True)
df=df.drop(columns='Date')

# Save the preprocessed DataFrame to a CSV file if needed
df.to_csv("./../data/preprocessed_data.csv", index=False)