In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# Read the CSV file and create the DataFrame
tsla_df = pd.read_csv('TSLA.csv')
print("Missing Values:\n", tsla_df.isnull().sum())

# Convert 'Date' to datetime and create an ordinal feature
tsla_df['Date'] = pd.to_datetime(tsla_df['Date'])
tsla_df['Date_Ordinal'] = tsla_df['Date'].map(pd.Timestamp.toordinal)
tsla_df = tsla_df.sort_values(by='Date')

# (Optional) Scale selected numerical features if needed
num_features = ['Open', 'High', 'Low', 'Adj Close', 'Volume']
scaler = StandardScaler()
tsla_df[num_features] = scaler.fit_transform(tsla_df[num_features])

# Define features and target. Here we use only 'Date_Ordinal' as the predictor.
X = tsla_df[['Date_Ordinal']]
y = tsla_df['Close']

# Split the data for evaluation (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Predictions on training and test data
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

# Evaluate model performance on the test set
mae = mean_absolute_error(y_test, y_test_pred)
mse = mean_squared_error(y_test, y_test_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_test_pred)

print("\nRandom Forest Model Performance on Test Data:")
print(f"Mean Absolute Error (MAE): {mae:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.2f}")
print(f"R-squared: {r2:.2f}")

# Predicting future dates (without evaluation since actual 'Close' values are unknown)
future_dates = ['2022-03-25', '2022-04-25', '2022-05-25', '2022-06-25', '2022-07-25', '2023-06-01']
future_dates_ordinal = np.array([pd.Timestamp(date).toordinal() for date in future_dates]).reshape(-1, 1)
future_predictions = rf_model.predict(future_dates_ordinal)

print("\nFuture Date Predictions (Close Prices):")
print(future_predictions)

Missing Values:
 Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

Random Forest Model Performance on Test Data:
Mean Absolute Error (MAE): 3.24
Mean Squared Error (MSE): 101.84
Root Mean Squared Error (RMSE): 10.09
R-squared: 1.00

Future Date Predictions (Close Prices):
[989.43928461 989.43928461 989.43928461 989.43928461 989.43928461
 989.43928461]


