In [17]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objs as go
from plotly.offline import plot
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression

In [18]:
# Load and preprocess data
def load_and_preprocess_data(filepath: str) -> pd.DataFrame:
    data = pd.read_csv(filepath)
    data['Date'] = pd.to_datetime(data['Date'])
    data['Close/Last'] = data['Close/Last'].replace('[\$,]', '', regex=True).astype(float)
    data = data.dropna().reset_index(drop=True)
    return data


invalid escape sequence '\$'


invalid escape sequence '\$'


invalid escape sequence '\$'



In [19]:
# Feature engineering
def add_technical_indicators(data: pd.DataFrame) -> pd.DataFrame:
    data['SMA_10'] = data['Close/Last'].rolling(window=10).mean()
    data['SMA_30'] = data['Close/Last'].rolling(window=30).mean()
    delta = data['Close/Last'].diff()
    gain = delta.clip(lower=0)
    loss = -delta.clip(upper=0)
    rs = gain.rolling(14).mean() / loss.rolling(14).mean()
    data['RSI'] = 100 - (100 / (1 + rs))
    return data.dropna()

In [20]:
# Visualizations
def plot_predictions(Y_test, y_pred, title, dates):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=dates, y=Y_test, mode='lines', name='Actual', line=dict(color='blue')))
    fig.add_trace(go.Scatter(x=dates, y=y_pred, mode='lines', name='Predicted', line=dict(color='red')))
    fig.update_layout(title=title, xaxis_title='Time', yaxis_title='Stock Price')
    plot(fig)

In [21]:
# Train and evaluate models
def train_and_evaluate_models(data: pd.DataFrame) -> None:
    features = ['SMA_10', 'SMA_30', 'RSI']
    X = data[features]
    Y = data['Close/Last']
    dates = data['Date']
    tscv = TimeSeriesSplit(n_splits=5)

    models = [LinearRegression(), RandomForestRegressor(), GradientBoostingRegressor()]
    model_names = ['Linear Regression', 'Random Forest', 'Gradient Boosting']

    for model, name in zip(models, model_names):
        scores = []
        for train_idx, test_idx in tscv.split(X):
            X_train, X_test = X.iloc[train_idx], X.iloc[test_idx]
            Y_train, Y_test = Y.iloc[train_idx], Y.iloc[test_idx]
            dates_test = dates.iloc[test_idx]

            scaler = StandardScaler().fit(X_train)
            X_train_scaled = scaler.transform(X_train)
            X_test_scaled = scaler.transform(X_test)

            model.fit(X_train_scaled, Y_train)
            y_pred = model.predict(X_test_scaled)

            scores.append({
                'R^2 Score': r2_score(Y_test, y_pred),
                'MSE': mean_squared_error(Y_test, y_pred),
                'MAE': mean_absolute_error(Y_test, y_pred)
            })
            plot_predictions(Y_test, y_pred, f'{name} Predictions', dates_test)

In [22]:
# Extend predictions to future dates
def extend_predictions(data: pd.DataFrame, model, scaler, future_steps: int):
    features = ['SMA_10', 'SMA_30', 'RSI']
    last_data = data.iloc[-future_steps:][features]
    last_data_scaled = scaler.transform(last_data)

    future_dates = pd.date_range(start=data['Date'].iloc[-1], periods=future_steps + 1, inclusive='right')
    future_preds = model.predict(last_data_scaled)

    return future_dates, future_preds

In [23]:
# Main function
def main(filepath: str, future_steps: int) -> None:
    data = load_and_preprocess_data(filepath)
    data = add_technical_indicators(data)

    features = ['SMA_10', 'SMA_30', 'RSI']
    X = data[features]
    Y = data['Close/Last']
    dates = data['Date']

    # Training models
    model = GradientBoostingRegressor()
    scaler = StandardScaler().fit(X)
    X_scaled = scaler.transform(X)
    model.fit(X_scaled, Y)

    # Future predictions
    future_dates, future_preds = extend_predictions(data, model, scaler, future_steps)

    # Plotting future predictions
    plot_predictions(Y, np.concatenate([model.predict(X_scaled), future_preds]), 'Future Predictions', np.concatenate([dates, future_dates]))

In [24]:
if __name__ == "__main__":
    main('teslanew.csv', future_steps=0)