In [5]:
#####  Load Tesla Data from CSV and Prepare

import pandas as pd
import matplotlib.pyplot as plt

# Load CSV
df = pd.read_csv("../src/gmf_assets.csv", parse_dates=['Date'])
df = df.astype(str)

# Filter Tesla data only
tsla_df = df[df['Asset'] == 'TSLA'].copy()

# Sort by date and set Date as index
tsla_df.sort_values('Date', inplace=True)
tsla_df.set_index('Date', inplace=True)

# Use only 'Close' price for forecasting
tsla_close = tsla_df['Close']

df['Date'] = pd.to_datetime(df['Date'], errors='coerce')


In [2]:
###### Split Data into Train and Test

# 80% train, 20% test split by date index
split_date = tsla_close.index[int(len(tsla_close)*0.8)]
train = tsla_close[:split_date]
test = tsla_close[split_date:]

# Output the size of each set
print(f"Training set: {train.shape}")
print(f"Testing set: {test.shape}")


Training set: (2029,)
Testing set: (508,)


In [10]:
print(df.columns)


Index(['Date', 'Open', 'High', 'Low', 'Close', 'Volume', 'Asset'], dtype='object')


In [9]:
import numpy as np
import pandas as pd

# Suppose df is your DataFrame, and you're using it to fit a model:
X = df.drop("target", axis=1)
y = df["target"]

# Convert all columns to numeric (if not already)
X = X.apply(pd.to_numeric, errors='coerce')

# Optional: remove any rows with NaNs (if coercion created NaNs)
X = X.dropna()
y = y[X.index]  # Keep y in sync with filtered X

# Now convert to numpy arrays if needed
X_np = np.asarray(X)
y_np = np.asarray(y)

# Now fit your model
# model.fit(X_np, y_np)


KeyError: "['target'] not found in axis"

In [8]:
### Build and Train ARIMA Model

from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# Fit ARIMA model (p,d,q) can be tuned; here example (5,1,0)
arima_model = ARIMA(train, order=(5,1,0))
arima_result = arima_model.fit()

train = pd.to_numeric(train, errors='coerce').dropna()
test = pd.to_numeric(test, errors='coerce').dropna()

# Forecast for the test period length
arima_forecast = arima_result.forecast(steps=len(test))

# Calculate error
arima_mse = mean_squared_error(test, arima_forecast)
print(f'ARIMA Mean Squared Error: {arima_mse:.4f}')

ValueError: Pandas data cast to numpy dtype of object. Check input data with np.asarray(data).

In [None]:
##### Prepare Data for LSTM Model

import numpy as np
from sklearn.preprocessing import MinMaxScaler

tsla_close = pd.to_numeric(tsla_close, errors='coerce').dropna()

# Scale data between 0 and 1
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(tsla_close.values.reshape(-1,1))

# Create sequences function for supervised learning
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i, 0])
        y.append(data[i, 0])
    return np.array(X), np.array(y)

# Create sequences
SEQ_LENGTH = 60
X, y = create_sequences(scaled_data, SEQ_LENGTH)

# Split train/test same as above
split_idx = int(len(X)*0.8)
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Reshape for LSTM [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error

def evaluate(true, predicted, model_name):
    mse = mean_squared_error(true, predicted)
    mae = mean_absolute_error(true, predicted)
    print(f"{model_name} - MSE: {mse:.4f}, MAE: {mae:.4f}")

evaluate(test.values, arima_pred.values, "ARIMA")
evaluate(test.values, lstm_pred, "LSTM")


#####################################################


In [None]:
# ARIMA Model (Auto Optimization)

from pmdarima.arima import auto_arima
import matplotlib.pyplot as plt

# Find best (p,d,q) using auto_arima
model_auto = auto_arima(train, seasonal=False, stepwise=True, suppress_warnings=True)
print(model_auto.summary())

# Fit and Forecast
model_auto.fit(train)
n_periods = len(test)
forecast_arima = model_auto.predict(n_periods=n_periods)

# Create forecast DataFrame
arima_pred = pd.Series(forecast_arima, index=test.index)

# Plot ARIMA Forecast
plt.figure(figsize=(10,5))
plt.plot(train, label='Train')
plt.plot(test, label='Test')
plt.plot(arima_pred, label='ARIMA Forecast')
plt.title('ARIMA Forecast vs Actual')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'pmdarima'

In [None]:
# LSTM Model (Deep Learning)

from pmdarima.arima import auto_arima
import matplotlib.pyplot as plt

# Find best (p,d,q) using auto_arima
model_auto = auto_arima(train, seasonal=False, stepwise=True, suppress_warnings=True)
print(model_auto.summary())

# Fit and Forecast
model_auto.fit(train)
n_periods = len(test)
forecast_arima = model_auto.predict(n_periods=n_periods)

# Create forecast DataFrame
arima_pred = pd.Series(forecast_arima, index=test.index)

# Plot ARIMA Forecast
plt.figure(figsize=(10,5))
plt.plot(train, label='Train')
plt.plot(test, label='Test')
plt.plot(arima_pred, label='ARIMA Forecast')
plt.title('ARIMA Forecast vs Actual')
plt.legend()
plt.show()


ModuleNotFoundError: No module named 'pmdarima'

In [None]:
# LSTM Model (Deep Learning)

from sklearn.preprocessing import MinMaxScaler
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# Normalize data
scaler = MinMaxScaler()
train_scaled = scaler.fit_transform(train)
test_scaled = scaler.transform(test)

# Create sequences for LSTM
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i])
    return np.array(X), np.array(y)

# Split LSTM input
seq_length = 60
X_train, y_train = create_sequences(train_scaled, seq_length)

# Build LSTM
model_lstm = Sequential()
model_lstm.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model_lstm.add(LSTM(units=50))
model_lstm.add(Dense(1))
model_lstm.compile(optimizer='adam', loss='mean_squared_error')

# Train
model_lstm.fit(X_train, y_train, epochs=10, batch_size=32, verbose=1)

# Forecasting
input_seq = train_scaled[-seq_length:]
predictions = []
for _ in range(len(test)):
    input_reshaped = input_seq.reshape(1, seq_length, 1)
    pred = model_lstm.predict(input_reshaped, verbose=0)[0][0]
    predictions.append(pred)
    input_seq = np.append(input_seq[1:], [[pred]], axis=0)

# Rescale LSTM predictions
lstm_pred = scaler.inverse_transform(np.array(predictions).reshape(-1, 1))

# Plot
plt.figure(figsize=(10,5))
plt.plot(test.index, test.values, label='Actual')
plt.plot(test.index, lstm_pred, label='LSTM Forecast')
plt.title('LSTM Forecast vs Actual')
plt.legend()
plt.show()


In [None]:
#Evaluation 

from sklearn.metrics import mean_squared_error, mean_absolute_error

def evaluate(true, predicted, model_name):
    mse = mean_squared_error(true, predicted)
    mae = mean_absolute_error(true, predicted)
    print(f"{model_name} - MSE: {mse:.4f}, MAE: {mae:.4f}")

evaluate(test.values, arima_pred.values, "ARIMA")
evaluate(test.values, lstm_pred, "LSTM")


NameError: name 'arima_pred' is not defined

🔍 Notes on Optimization
ARIMA: auto_arima() handles (p, d, q) optimization.

LSTM: You can experiment with:

units: 50, 100, 200

epochs: 10–100

batch_size: 16, 32, 64

seq_length: 30, 60, 90