In [21]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.preprocessing import MinMaxScaler,StandardScaler

data_path="/kaggle/input/stock-market-dataset/stocks/ABM.csv"
data = pd.read_csv(data_path,na_values=["null"],index_col="Date",parse_dates=True,infer_datetime_format=True)
print(data.shape)
print(data.head())


# Remove rows with NaN values
data.dropna(inplace=True)

dataset_train0 = data[:int(data.shape[0]*0.8)]
dataset_test0 = data[int(data.shape[0]*0.8):]
scaler = MinMaxScaler(feature_range=(0,1))
dataset_train = scaler.fit_transform(dataset_train0)
dataset_test = scaler.transform(dataset_test0)
print(dataset_train0.shape,dataset_test0.shape)

x_train, y_train = dataset_train[:,:-1],dataset_train[:,-1]
x_test, y_test = dataset_test[:,:-1],dataset_test[:,-1]

# Create a RandomForestRegressor model
model = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on test data
y_pred = model.predict(X_test)

# Calculate the Mean Absolute Error and Mean Squared Error
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print('mae:',mae)
print('mse:',mse)

(10098, 6)
                Open      High       Low     Close  Adj Close  Volume
Date                                                                 
1980-03-17  0.000000  0.950000  0.933333  0.933333   0.027191   18400
1980-03-18  0.000000  0.933333  0.908333  0.916667   0.026705   32800
1980-03-19  0.916667  0.916667  0.916667  0.916667   0.026705       0
1980-03-20  0.000000  0.933333  0.925000  0.933333   0.027191   29600
1980-03-21  0.000000  0.933333  0.933333  0.933333   0.027191    9600
(8078, 6) (2020, 6)
mae: 0.055622764427400134
mse: 0.007922805606685755
