In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [2]:
esg = pd.read_csv('Training_Data/esg.csv')
rain_temp = pd.read_csv('Training_Data/rain_temp.csv')
financial_data = pd.read_csv('Training_Data/financial_data.csv')
finaL_data = pd.read_csv('Training_Data/final.csv')

In [148]:
financial_data

Unnamed: 0,Open,High,Low,Close,Adj Close
0,5021.500000,5976.000000,5000.950195,5900.649902,5900.649902
1,5903.799805,6011.950195,5394.350098,5762.750000,5762.750000
2,5765.450195,6185.399902,5676.700195,6138.600098,6138.600098
3,6136.750000,6357.100098,4448.500000,5137.450195,5137.450195
4,5140.600098,5545.200195,4803.600098,5223.500000,5223.500000
...,...,...,...,...,...
118,10101.049805,10137.849609,9685.549805,9917.900391,9917.900391
119,9937.650391,10178.950195,9687.549805,9788.599609,9788.599609
120,9893.299805,10384.500000,9831.049805,10335.299805,10335.299805
121,10390.349609,10490.450195,10094.000000,10226.549805,10226.549805


In [3]:
finaL_data.drop(['Unnamed: 0'], axis=1, inplace=True)

Train Test Split

In [4]:
from sklearn.model_selection import train_test_split

In [5]:
X = finaL_data.drop(['risk_factor'], axis=1)
y= finaL_data['risk_factor']

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
y_train_binary = (y_train > 0.5).astype(int)
y_test_binary = (y_test > 0.5).astype(int)

Random Forest

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [9]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)

clf.fit(X_train, y_train_binary)

y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test_binary, y_pred)
print(f'Accuracy: {accuracy}')

print('\nClassification Report:')
print(classification_report(y_test_binary, y_pred))

print('\nConfusion Matrix:')
print(confusion_matrix(y_test_binary, y_pred))

Accuracy: 0.92

Classification Report:
              precision    recall  f1-score   support

           0       0.89      0.89      0.89         9
           1       0.94      0.94      0.94        16

    accuracy                           0.92        25
   macro avg       0.91      0.91      0.91        25
weighted avg       0.92      0.92      0.92        25


Confusion Matrix:
[[ 8  1]
 [ 1 15]]


Logistic Regression

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import mean_squared_error, r2_score

In [11]:
logreg = LogisticRegression(random_state=42)
logreg.fit(X_train, y_train_binary)

y_pred = logreg.predict(X_test)

accuracy = accuracy_score(y_test_binary, y_pred)
print(f'Accuracy: {accuracy}')

print('\nClassification Report:')
print(classification_report(y_test_binary, y_pred))

print('\nConfusion Matrix:')
print(confusion_matrix(y_test_binary, y_pred))

Accuracy: 0.84

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.67      0.75         9
           1       0.83      0.94      0.88        16

    accuracy                           0.84        25
   macro avg       0.85      0.80      0.82        25
weighted avg       0.84      0.84      0.83        25


Confusion Matrix:
[[ 6  3]
 [ 1 15]]


Trend Prediction

In [118]:
import tensorflow as tf
from tensorflow.keras.layers import SimpleRNN, Dense, LSTM, GRU, Reshape
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler

In [97]:
df = financial_data

In [98]:
df

Unnamed: 0,Open,High,Low,Close,Adj Close
0,5021.500000,5976.000000,5000.950195,5900.649902,5900.649902
1,5903.799805,6011.950195,5394.350098,5762.750000,5762.750000
2,5765.450195,6185.399902,5676.700195,6138.600098,6138.600098
3,6136.750000,6357.100098,4448.500000,5137.450195,5137.450195
4,5140.600098,5545.200195,4803.600098,5223.500000,5223.500000
...,...,...,...,...,...
118,10101.049805,10137.849609,9685.549805,9917.900391,9917.900391
119,9937.650391,10178.950195,9687.549805,9788.599609,9788.599609
120,9893.299805,10384.500000,9831.049805,10335.299805,10335.299805
121,10390.349609,10490.450195,10094.000000,10226.549805,10226.549805


In [131]:
data = df['Close'].values.reshape(-1, 1)

scaler = MinMaxScaler(feature_range=(0, 1))
data_normalized = scaler.fit_transform(data)

train_size = int(len(data_normalized) * 0.8)
test_size = len(data_normalized) - train_size
train_data, test_data = data_normalized[0:train_size], data_normalized[train_size:len(data_normalized)]

sequence_length = 10

def create_sequences(data, sequence_length):
    sequences = []
    target = []
    for i in range(len(data) - sequence_length):
        seq = data[i:i+sequence_length]
        label = data[i+sequence_length]
        sequences.append(seq)
        target.append(label)
    return np.array(sequences), np.array(target)

X_train, y_train = create_sequences(train_data, sequence_length)
X_test, y_test = create_sequences(test_data, sequence_length)

model = Sequential()
model.add(SimpleRNN(50, activation='relu', input_shape=(sequence_length, 1)))
model.add(Reshape((-1, 50)))

model.add(LSTM(50, activation='relu'))  
model.add(Reshape((-1, 50)))

model.add(GRU(50, activation='relu'))  
model.add(Dense(1))

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001), loss='mean_squared_error', metrics=["accuracy"])

X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

model.fit(X_train, y_train, epochs=1000, batch_size=32, validation_data=(X_test, y_test), verbose=2)

last_sequence = data_normalized[-sequence_length:]
last_sequence = last_sequence.reshape((1, sequence_length, 1))
predicted_price = model.predict(last_sequence)
predicted_price = scaler.inverse_transform(predicted_price)


Epoch 1/1000
3/3 - 3s - loss: 0.1997 - accuracy: 0.0114 - val_loss: 0.7619 - val_accuracy: 0.0000e+00 - 3s/epoch - 1s/step
Epoch 2/1000
3/3 - 0s - loss: 0.1980 - accuracy: 0.0114 - val_loss: 0.7569 - val_accuracy: 0.0000e+00 - 84ms/epoch - 28ms/step
Epoch 3/1000
3/3 - 0s - loss: 0.1963 - accuracy: 0.0114 - val_loss: 0.7521 - val_accuracy: 0.0000e+00 - 75ms/epoch - 25ms/step
Epoch 4/1000
3/3 - 0s - loss: 0.1947 - accuracy: 0.0114 - val_loss: 0.7477 - val_accuracy: 0.0000e+00 - 74ms/epoch - 25ms/step
Epoch 5/1000
3/3 - 0s - loss: 0.1932 - accuracy: 0.0114 - val_loss: 0.7436 - val_accuracy: 0.0000e+00 - 83ms/epoch - 28ms/step
Epoch 6/1000
3/3 - 0s - loss: 0.1917 - accuracy: 0.0114 - val_loss: 0.7397 - val_accuracy: 0.0000e+00 - 83ms/epoch - 28ms/step
Epoch 7/1000
3/3 - 0s - loss: 0.1903 - accuracy: 0.0114 - val_loss: 0.7360 - val_accuracy: 0.0000e+00 - 84ms/epoch - 28ms/step
Epoch 8/1000
3/3 - 0s - loss: 0.1891 - accuracy: 0.0114 - val_loss: 0.7323 - val_accuracy: 0.0000e+00 - 95ms/epoch 

In [129]:
print(f"Predicted Stock Price for the Next Day: {predicted_price[0][0]}")

Predicted Stock Price for the Next Day: 10206.228515625


In [142]:
trend_label = 0

In [145]:
comparison_result = predicted_price[0][0] > financial_data['Close'][:-1]

if not comparison_result.any():
    trend_label += 1
else:
    pass

In [146]:
trend_label

0

Mixing Both of the models for 