In [None]:
# Step 1: Import necessary libraries
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.stattools import adfuller


ModuleNotFoundError: No module named 'yfinance'

In [None]:
# Fetching data from YFinance for TSLA, BND, and SPY
tickers = ['TSLA', 'BND', 'SPY']
data = {ticker: yf.download(ticker, start='2015-07-01', end='2025-07-31') for ticker in tickers}

# Save data to CSV files for future use
for ticker, df in data.items():
    df.to_csv(f'data/{ticker}.csv')

print("Data fetched and saved successfully!")


In [None]:
# Step 3: Load the fetched data from CSV for preprocessing
tsla = pd.read_csv('data/TSLA.csv', index_col='Date', parse_dates=True)
bnd = pd.read_csv('data/BND.csv', index_col='Date', parse_dates=True)
spy = pd.read_csv('data/SPY.csv', index_col='Date', parse_dates=True)

# Show initial data preview
print("TSLA Data:")
print(tsla.head())
print("BND Data:")
print(bnd.head())
print("SPY Data:")
print(spy.head())


In [None]:
#  Data Cleaning - Handle missing values
# Check for missing values in each dataset
print(f"TSLA Missing Data: {tsla.isnull().sum()}")
print(f"BND Missing Data: {bnd.isnull().sum()}")
print(f"SPY Missing Data: {spy.isnull().sum()}")

# Fill missing values using forward fill or interpolation
tsla.fillna(method='ffill', inplace=True)
bnd.fillna(method='ffill', inplace=True)
spy.fillna(method='ffill', inplace=True)

print("Missing values handled using forward fill.")


In [None]:
# Normalizing the 'Close' prices using MinMaxScaler
scaler = MinMaxScaler()

# Normalize the closing prices of all assets
tsla['Close'] = scaler.fit_transform(tsla[['Close']])
bnd['Close'] = scaler.fit_transform(bnd[['Close']])
spy['Close'] = scaler.fit_transform(spy[['Close']])

# Save cleaned and normalized data to CSV for future use
tsla.to_csv('data/TSLA_cleaned.csv')
bnd.to_csv('data/BND_cleaned.csv')
spy.to_csv('data/SPY_cleaned.csv')

print("Data normalization completed.")


In [None]:
#  Calculate daily returns for each asset
tsla['Return'] = tsla['Close'].pct_change()
bnd['Return'] = bnd['Close'].pct_change()
spy['Return'] = spy['Close'].pct_change()

# Display the first few rows of returns
print(tsla[['Close', 'Return']].head())
print(bnd[['Close', 'Return']].head())
print(spy[['Close', 'Return']].head())


In [None]:
#  ADF Test for stationarity on daily returns
def adf_test(series):
    result = adfuller(series.dropna())
    return result[0], result[1]  # ADF Statistic and p-value

# Apply ADF test on daily returns for each asset
adf_tsla = adf_test(tsla['Return'])
adf_bnd = adf_test(bnd['Return'])
adf_spy = adf_test(spy['Return'])

# Display ADF test results
print(f"TSLA ADF Statistic: {adf_tsla[0]}, p-value: {adf_tsla[1]}")
print(f"BND ADF Statistic: {adf_bnd[0]}, p-value: {adf_bnd[1]}")
print(f"SPY ADF Statistic: {adf_spy[0]}, p-value: {adf_spy[1]}")


In [None]:
# Calculating rolling means and standard deviations for volatility
window = 30  # 30-day rolling window

# Calculate rolling mean and rolling standard deviation for TSLA, BND, and SPY
tsla['Rolling Mean'] = tsla['Close'].rolling(window=window).mean()
tsla['Rolling Std'] = tsla['Close'].rolling(window=window).std()

bnd['Rolling Mean'] = bnd['Close'].rolling(window=window).mean()
bnd['Rolling Std'] = bnd['Close'].rolling(window=window).std()

spy['Rolling Mean'] = spy['Close'].rolling(window=window).mean()
spy['Rolling Std'] = spy['Close'].rolling(window=window).std()

# Plotting rolling mean and standard deviation for TSLA
plt.figure(figsize=(12, 6))
plt.plot(tsla['Close'], label='TSLA Close')
plt.plot(tsla['Rolling Mean'], label='TSLA Rolling Mean', linestyle='--')
plt.plot(tsla['Rolling Std'], label='TSLA Rolling Std', linestyle='--')
plt.title('TSLA Closing Price, Rolling Mean, and Volatility')
plt.legend()
plt.show()


In [None]:
#  ARIMA Model for Forecasting (Tesla Returns)
from statsmodels.tsa.arima.model import ARIMA

# Fit ARIMA model (using default parameters p=1, d=1, q=1)
model_tsla = ARIMA(tsla['Return'].dropna(), order=(1, 1, 1))
model_tsla_fit = model_tsla.fit()

# Forecast next 30 days
forecast_tsla = model_tsla_fit.forecast(steps=30)
forecast_tsla = pd.DataFrame(forecast_tsla, columns=['Forecast'], index=pd.date_range(start=tsla.index[-1], periods=30 + 1, freq='B')[1:])

# Plot the forecast
plt.figure(figsize=(12, 6))
plt.plot(tsla['Return'], label='TSLA Return')
plt.plot(forecast_tsla, label='TSLA Forecast', linestyle='--')
plt.title('TSLA Daily Return Forecast using ARIMA')
plt.legend()
plt.show()

# Evaluate ARIMA model's performance
from sklearn.metrics import mean_squared_error
arima_mse = mean_squared_error(tsla['Return'].dropna()[-30:], forecast_tsla)
print(f"ARIMA Model MSE: {arima_mse}")
