In [1]:
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import time

In [2]:
df = pd.read_csv("TSLA.csv")

In [3]:
df.head(5)

Unnamed: 0,Date,High,Low,Open,Close,Volume,Adj Close
0,2019-09-30,48.796001,47.222,48.599998,48.174,29399000.0,48.174
1,2019-10-01,49.189999,47.826,48.299999,48.938,30813000.0,48.938
2,2019-10-02,48.93,47.886002,48.658001,48.625999,28157000.0,48.625999
3,2019-10-03,46.896,44.855999,46.372002,46.605999,75422500.0,46.605999
4,2019-10-04,46.956001,45.613998,46.321999,46.285999,39975000.0,46.285999


In [4]:
df=df[["Close"]].copy()

In [5]:
df["target"] = df.Close.shift(-1)


In [6]:
df.dropna(inplace=True)

In [7]:
df.head(5)

Unnamed: 0,Close,target
0,48.174,48.938
1,48.938,48.625999
2,48.625999,46.605999
3,46.605999,46.285999
4,46.285999,47.543999


In [8]:
def train_test_split(data, percentage):
    data=data.values
    n=int(len(data)* (1-percentage))
    return data[:n], data[n:]

In [9]:
train, test = train_test_split(df, 0.2)

In [10]:
print(len(df))
print(len(train))
print(len(test))

638
510
128


In [11]:
X=train[:, : -1]
y=train[:, -1]

In [12]:
from xgboost import XGBRegressor

print(f"X shape: {X.shape}")
print(f"y shape: {y.shape}")

model = XGBRegressor(objective="reg:squarederror", n_estimators=1000, learning_rate=0.1)
model.fit(X,y)

X shape: (510, 1)
y shape: (510,)


In [13]:
test[0]

array([793.60998535, 785.48999023])

In [14]:
val = np.array(test[0,0]).reshape(1,-1)

pred=model.predict(val)
print(pred[0])

808.6418


In [15]:
def xgb_predict(train, val):
    train = np.array(train)
    X,y =train[:,: -1], train[:, -1]
    model=XGBRegressor(objective="reg:squarederror", n_estimators=1000)
    model.fit(X,y)

    val=np.array(val).reshape(1,-1)
    pred = model.predict(val)
    return pred[0]

In [16]:
xgb_predict(train, test[0,0])

808.68335

In [17]:
from sklearn.metrics import root_mean_squared_error

def validate(data, perc):
    predictions =[]

    train,test = train_test_split(data,perc)

    test = np.array(test)

    history = [x for x in train]

    for i in range(len(test)):
        test_X, test_y = test[i, :-1], test[i, -1]

        pred =xgb_predict(history, test_X[0])
        predictions.append(pred)

        history.append(test[i])

    error =root_mean_squared_error(test[:, -1], predictions)

    return error, test[:, -1],predictions

In [None]:
%%time
df = df.dropna()
rmse,y, pred = validate(df, 0.3)

print(rmse)

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(y, label="Actual", linewidth=2)
plt.plot(pred, label="Predicted", linestyle="--", linewidth=2)
plt.title("Tesla Stock Price: Actual vs Predicted")
plt.xlabel("Time Step (Day Index)")
plt.ylabel("Price ($)")
plt.legend()
plt.grid(True)
plt.show()