In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

In [2]:
# Load the data
data = pd.read_csv('Bank_Stock_Price_10Y.csv')

In [5]:
data.describe()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
count,2483.0,2483.0,2483.0,2483.0,2483.0,2483.0
mean,5219.973822,5265.847765,5173.628675,5219.887233,4886.148684,79974960.0
std,2223.156537,2240.113146,2206.459905,2223.903144,2276.934419,53781220.0
min,1970.0,1980.0,1940.0,1965.0,1691.382568,0.0
25%,2955.0,2985.0,2930.0,2950.0,2612.564454,51535750.0
50%,5170.0,5235.0,5120.0,5180.0,4736.543945,70098000.0
75%,6822.5,6890.0,6740.0,6800.0,6349.964111,96517550.0
max,9775.0,9775.0,9675.0,9750.0,9750.0,1062862000.0


In [4]:
# Preprocessing: Check for null values and handle them
data.isnull().sum()
data = data.dropna()

In [6]:
scaler = StandardScaler()
data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']] = scaler.fit_transform(data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume']])

In [7]:
X = data.drop('Close', axis=1)
X = X.drop('Date', axis=1)
y = data['Close']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
model = DecisionTreeRegressor(random_state=42)
model.fit(X_train, y_train)

In [10]:
y_pred = model.predict(X_test)

In [11]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print('Mean Squared Error:', mse)
print('R-squared:', r2)

Mean Squared Error: 0.0002560900975424451
R-squared: 0.9997354910385263


In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np

# Load the data
data = pd.read_csv('/content/Bank_Stock_Price_10Y.csv')

# Convert the 'Date' column to Unix timestamps
data['Date'] = pd.to_datetime(data['Date']).astype(int)/10**9

# Split the data into features (X) and target (y)
X = data.drop(['Close'], axis=1)
y = data['Close']

# Define the number of models to train and the number of rows to select for each model
num_models = 10
num_rows_per_model = 50

# Initialize lists to store the model parameters, accuracy, and evaluation metrics
model_params = []
model_accuracies = []
model_metrics = []

# Train the models
for i in range(num_models):

    random_rows = np.random.choice(len(X), num_rows_per_model, replace=True)
    X_train, X_test, y_train, y_test = train_test_split(X.iloc[random_rows], y.iloc[random_rows], test_size=0.2, random_state=i)


    model = DecisionTreeRegressor(random_state=i)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)


    accuracy = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)


    model_params.append(model.get_params())
    model_accuracies.append(accuracy)
    model_metrics.append({'MSE': mse})

    print(f'Model {i+1}:')
    print(f'Accuracy: {accuracy}')
    print(f'MSE: {mse}')
    print('-----------------------------')

Model 1:
Accuracy: 0.9990029576931665
MSE: 4175.0
-----------------------------
Model 2:
Accuracy: 0.9967342163753893
MSE: 14410.0
-----------------------------
Model 3:
Accuracy: 0.8731697304905
MSE: 780757.5
-----------------------------
Model 4:
Accuracy: 0.9985325365340656
MSE: 11022.5
-----------------------------
Model 5:
Accuracy: 0.979021527134588
MSE: 80295.0
-----------------------------
Model 6:
Accuracy: 0.9995000486999194
MSE: 1930.0
-----------------------------
Model 7:
Accuracy: 0.9942578324314936
MSE: 26480.0
-----------------------------
Model 8:
Accuracy: 0.9824647093737084
MSE: 66740.0
-----------------------------
Model 9:
Accuracy: 0.998081565429293
MSE: 11525.0
-----------------------------
Model 10:
Accuracy: 0.98675905839276
MSE: 68592.5
-----------------------------
