<a href="https://www.kaggle.com/code/fredrickkariuki/predicting-dollar-value-in-kenya?scriptVersionId=150827078" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# stock prediction using LSTM

## Importing the required libraries





In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

## READING DOLLAR VALUE IN KENYA 

In [None]:
data = pd.read_excel('/kaggle/input/dollar-to-kenyan/DOLLAR TO KENYAN.xlsx')
data.head()

## Data processing

In [None]:
data.info()

In [None]:
# checking for any duplicate values
data.duplicated().any()

In [None]:
data["Close"]=pd.to_numeric(data.Close,errors='coerce')

In [None]:
data.isnull().sum().sum()

In [None]:
# Get the statistics of the data
data.describe()

In [None]:
# plot the 'Close' feature  
plt.plot(data['Date'], data['Close'], color = 'blue', label = 'Close')
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler

# normalizing the data
Scaler = MinMaxScaler(feature_range=(0,1))
dataV1 = Scaler.fit_transform(dataV1)
dataV1.shape

In [None]:
X = data['Close']
y = data['Date']

In [None]:
from sklearn.model_selection import train_test_split

# Assuming you have your features in X and labels in y
# Replace X and y with your actual data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
X_train.shape, y_train.shape

## Creating the model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Assuming your input sequence has 8 time steps
time_steps = 8

model = Sequential()

# Add the first LSTM layer with 64 units
model.add(LSTM(64, input_shape=(time_steps, 1), return_sequences=True))

# Add additional LSTM layers
for _ in range(20):
    model.add(LSTM(64, return_sequences=True))

# Add the final LSTM layer without return_sequences
model.add(LSTM(64))

# Add a dense output layer
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Print a summary of the model architecture
model.summary()


In [None]:
# Training the model
history = model.fit(
    X_train, 
    y_train, 
    epochs = 10, 
    batch_size = 50, 
    verbose=1,
)

In [None]:
# plotting the loss from the training model
plt.plot(history.history['loss'])
plt.title('Training model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.show()

## Testing and making predictions

In [None]:
testData = data.iloc[:,3:4] # Get 'Close' feature
y_real = testData.iloc[feature_length+1:,0:].values # Actual Stock values

x_test = testData.iloc[:,0:].values  # values from whole data to test
x_test = np.array(x_test).reshape(-1, 1)
y_real.shape, x_test.shape

In [None]:
# normalizing the Data using Scaler.transform function
x_test = Scaler.transform(x_test)
x_test.shape

In [None]:
x_test, y_test = Create_Features_and_Targets(x_test, feature_length)
x_test = np.reshape(x_test,(x_test.shape[0],x_test.shape[1],1)) # Making data 3 dimensional 
x_test.shape

In [None]:
# making predictions
y_pred = model.predict(x_test)

In [None]:
predicted_price = Scaler.inverse_transform(y_pred)

In [None]:
# plotting the Actual Stock Price and the Predicted Stock Price
plt.plot(y_real, color = 'red', label = 'Actual')
plt.plot(predicted_price, color = 'green', label = 'Predicted')

plt.xlabel('Time')
plt.ylabel('Stock')
plt.legend()
plt.show()

## Real-time predictions

In [None]:
def predict_given_date(data, date, feature_length):
  if date not in data.index:
    data.loc[pd.Timestamp(date)] = 0
  idx = data.index.get_loc(date)
  close_col = data.iloc[:,3:4]
  close_col = close_col.iloc[idx - feature_length : idx,:].values
  close_col = np.expand_dims(Scaler.transform(close_col) , axis = 0)
  Prediction = model.predict(close_col)
  Prediction = Scaler.inverse_transform(Prediction)
  return Prediction

In [None]:
predict_given_date(data, '2022-09-20', feature_length)

Here we can see our model predicted '101.7007' to the given data which is 2022-09-20, well lets check if its any closer to the actuval price value of that given date. This peace of code do just that.

## READING THE MULTIVARIATE DATASET

In [None]:
multi_variate_dataset = pd.read_excel('/kaggle/input/multivariate-prediction/Dollar.xlsx')

In [None]:
multi_variate_dataset

In [None]:
multi_variate_dataset= multi_variate_dataset.drop('KENYA', axis=1)

In [None]:
multi_variate_dataset.isna().sum().sum()

In [None]:
multi_variate_dataset=multi_variate_dataset.dropna()

In [None]:
multi_variate_dataset.isna().sum()

In [None]:
multi_variate_dataset.shape

In [None]:
# Assuming 'datetime_column' is your datetime column in X_train
multi_variate_dataset['DATE'] = pd.to_datetime(multi_variate_dataset['DATE'])

In [None]:
multi_variate_dataset

In [None]:
import pandas as pd
import numpy as np

# Assuming multi_variate_dataset is your DataFrame with 'DATE' and other features
# Assuming you want to predict the 'TARGET' variable

# Convert 'DATE' column to datetime
multi_variate_dataset['DATE'] = pd.to_datetime(multi_variate_dataset['DATE'])

# Ensure the DataFrame is sorted by date
multi_variate_dataset = multi_variate_dataset.sort_values(by='DATE')

# Set 'DATE' as the index
multi_variate_dataset.set_index('DATE', inplace=True)

# Assuming you want to predict the 'TARGET' column
y = multi_variate_dataset['UGANDA']

# Shift the target variable to represent past values as features
num_time_steps = 8  # You can adjust this based on your requirement
X = pd.concat([y.shift(i) for i in range(1, num_time_steps + 1)], axis=1)

# Drop rows with NaN values created by shifting
X = X.dropna()

# Reset index to make 'DATE' a column again
X.reset_index(drop=True, inplace=True)
y = y.iloc[num_time_steps:]

# Now, X contains past values of the target variable as features, and y is the target variable


In [None]:
from sklearn.model_selection import train_test_split

# Assuming you have your features in X and labels in y
# Replace X and y with your actual data

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Assuming your input sequence has 8 time steps
time_steps = 8

model = Sequential()

# Add the first LSTM layer with 64 units
model.add(LSTM(64, input_shape=(time_steps, 1), return_sequences=True))

# Add additional LSTM layers
for _ in range(10):
    model.add(LSTM(64, return_sequences=True))

# Add the final LSTM layer without return_sequences
model.add(LSTM(64))

# Add a dense output layer
model.add(Dense(1))

# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error')

# Print a summary of the model architecture
model.summary()


In [None]:
# Assuming X_train and y_train are your training data
# Also, assuming you have defined your LSTM model as 'model'

model.fit(X_train, y_train, epochs=10, batch_size=1, validation_split=0.2)


In [None]:
# making predictions
y_pred = model.predict(x_test)