## Tesla Stock Prediction from Ensemble Model comprised of:
1. ARIMA time-series forecasting
2. Sentiment Analysis from Elon Musk's tweets

### ARIMA time-series forecasting

#### Extract Tesla Stock Historical Data
##### dataset: https://www.nasdaq.com/market-activity/stocks/tsla/historical

In [239]:
#import the required libraries
import requests
from bs4 import BeautifulSoup
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
import time

In [240]:
#choose the user_agent suitable for your browser
user_agent = "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"

#set to the path where your chromedriver bin file is located
driver_path = "r\C:\\Users\\Administrator\\Desktop\\chromedriver.exe"

#setup webdriver to access browser pages
options = webdriver.ChromeOptions()
options.add_argument('--ignore-certificate-errors')
options.add_argument('--incognito')
options.add_argument('--headless')
options.add_argument("user-agent=%s".format(user_agent))
service = Service(executable_path=driver_path)
driver = webdriver.Chrome(options=options, service=service)

In [241]:
driver.get("https://www.nasdaq.com/market-activity/stocks/tsla/historical/")

#change data-value to your desired value
select_duration = driver.find_elements(By.XPATH,"//button[@data-value='y5']")

In [242]:
#click the button of disired data-value chosen previously
driver.execute_script("arguments[0].click();", select_duration[0])

In [243]:
tesla_stock_df = pd.DataFrame(columns=["Date", "Close", "Volume", "Open", "High", "Low"])

In [244]:
#extract data in a page using bs4
def extract_content(page_source):
  global tesla_stock_df
  soup = BeautifulSoup(page_source, "html.parser")

  historical_data_row = soup.find_all('tr', {'class': 'historical-data__row'})

  for rows in historical_data_row:
    cols = rows.find_all("td")
    temp_df = pd.DataFrame([{"Date":rows.th.get_text(), "Close":cols[0].text, "Volume":cols[1].text, 
                            "Open":cols[2].text, "High":cols[3].text, "Low":cols[4].text}])
    tesla_stock_df = pd.concat([tesla_stock_df, temp_df])

  return tesla_stock_df

  

In [245]:
#iterate through each page until the end to obtain all the stock data
page_source = driver.page_source
extract_content(page_source)

current_page = 1
while True:
    try:
        current_page += 1
        print("Processing page {}.......".format(current_page))
        
        next_page_element = driver.find_element(By.XPATH, "//button[@class='pagination__page' and @data-page='{}']".format(str(current_page)))

        if next_page_element.is_enabled():
            driver.execute_script("arguments[0].click();", next_page_element)
            time.sleep(1)
            page_source = driver.page_source
            extract_content(page_source)
        else:
            break
    except NoSuchElementException :
        break
    

Processing page 2.......
Processing page 3.......
Processing page 4.......
Processing page 5.......
Processing page 6.......
Processing page 7.......
Processing page 8.......
Processing page 9.......
Processing page 10.......
Processing page 11.......
Processing page 12.......
Processing page 13.......
Processing page 14.......
Processing page 15.......
Processing page 16.......
Processing page 17.......
Processing page 18.......
Processing page 19.......
Processing page 20.......
Processing page 21.......
Processing page 22.......
Processing page 23.......
Processing page 24.......
Processing page 25.......
Processing page 26.......
Processing page 27.......
Processing page 28.......
Processing page 29.......
Processing page 30.......
Processing page 31.......
Processing page 32.......
Processing page 33.......
Processing page 34.......
Processing page 35.......
Processing page 36.......
Processing page 37.......
Processing page 38.......
Processing page 39.......
Processing page 40..

In [246]:
tesla_stock_df.reset_index(drop=True, inplace=True)

In [247]:
print(tesla_stock_df)

            Date     Close       Volume      Open      High       Low
0     07/27/2023   $255.71  103,697,300   $268.31   $269.13   $255.30
1     07/26/2023   $264.35   95,856,180   $263.25   $268.04   $261.75
2     07/25/2023   $265.28  112,757,300   $272.38   $272.90   $265.00
3     07/24/2023   $269.06  137,005,000   $255.85   $269.85   $254.12
4     07/21/2023   $260.02  161,796,100   $268.00   $268.00   $255.80
...          ...       ...          ...       ...       ...       ...
1252  08/03/2018  $23.2113  204,699,498  $23.1873  $23.6667  $22.8353
1253  08/02/2018  $23.3027  347,387,226   $21.896  $23.3327   $21.544
1254  08/01/2018   $20.056  131,521,419   $19.866    $20.20  $19.5333
1255  07/31/2018   $19.876   75,826,372  $19.4833   $19.888  $19.2713
1256  07/30/2018  $19.3447  101,993,664  $19.7267    $19.74  $19.0753

[1257 rows x 6 columns]


In [248]:
#save the resulting df to csv
file_path = "tesla_stock_price.csv"
tesla_stock_df.to_csv(file_path, mode='w')

#### Data Cleaning

In [249]:
tesla_stock_df = pd.read_csv(file_path)

In [250]:
#data cleaning
tesla_stock_df["Date"] = pd.to_datetime(tesla_stock_df["Date"])
tesla_stock_df["Close"] = tesla_stock_df["Close"].str.replace("$", "")
tesla_stock_df["Volume"] = tesla_stock_df["Volume"].str.replace(",", "")
tesla_stock_df["Open"] = tesla_stock_df["Open"].str.replace("$", "")
tesla_stock_df["High"] = tesla_stock_df["High"].str.replace("$", "")
tesla_stock_df["Low"] = tesla_stock_df["Low"].str.replace("$", "")

tesla_stock_df[["Close", "Volume", "Open", "High", "Low"]] = tesla_stock_df[[
    "Close", "Volume", "Open", "High", "Low"]].astype(str).astype(float)
print(tesla_stock_df)

      Unnamed: 0       Date     Close       Volume      Open      High  \
0              0 2023-07-27  255.7100  103697300.0  268.3100  269.1300   
1              1 2023-07-26  264.3500   95856180.0  263.2500  268.0400   
2              2 2023-07-25  265.2800  112757300.0  272.3800  272.9000   
3              3 2023-07-24  269.0600  137005000.0  255.8500  269.8500   
4              4 2023-07-21  260.0200  161796100.0  268.0000  268.0000   
...          ...        ...       ...          ...       ...       ...   
1252        1252 2018-08-03   23.2113  204699498.0   23.1873   23.6667   
1253        1253 2018-08-02   23.3027  347387226.0   21.8960   23.3327   
1254        1254 2018-08-01   20.0560  131521419.0   19.8660   20.2000   
1255        1255 2018-07-31   19.8760   75826372.0   19.4833   19.8880   
1256        1256 2018-07-30   19.3447  101993664.0   19.7267   19.7400   

           Low  
0     255.3000  
1     261.7500  
2     265.0000  
3     254.1200  
4     255.8000  
...      

In [251]:
print(tesla_stock_df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1257 entries, 0 to 1256
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   Unnamed: 0  1257 non-null   int64         
 1   Date        1257 non-null   datetime64[ns]
 2   Close       1257 non-null   float64       
 3   Volume      1257 non-null   float64       
 4   Open        1257 non-null   float64       
 5   High        1257 non-null   float64       
 6   Low         1257 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 68.9 KB
None


In [252]:
print(tesla_stock_df.describe(include="all").round(2))

       Unnamed: 0                           Date    Close        Volume  \
count     1257.00                           1257  1257.00  1.257000e+03   
mean       628.00  2021-01-25 16:20:37.231503616   151.97  1.366425e+08   
min          0.00            2018-07-30 00:00:00    11.93  2.940168e+07   
25%        314.00            2019-10-28 00:00:00    23.21  7.911178e+07   
50%        628.00            2021-01-27 00:00:00   166.66  1.095203e+08   
75%        942.00            2022-04-26 00:00:00   242.67  1.656112e+08   
max       1256.00            2023-07-27 00:00:00   409.97  9.140809e+08   
std        363.01                            NaN   113.22  8.887694e+07   

          Open     High      Low  
count  1257.00  1257.00  1257.00  
mean    152.03   155.63   148.22  
min      12.07    12.45    11.80  
25%      23.08    23.47    22.81  
50%     167.38   170.79   163.51  
75%     242.88   248.25   237.14  
max     411.47   414.50   405.67  
std     113.39   115.99   110.53  
