A guide to building a predictive model for Tesla's stock using Python and a machine learning library called scikit-learn.
The code include news sentiment and social media mentions as additional features.

In [2]:
#Step 1: Import the necessary libraries

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import tweepy
import re


In [3]:
#Step 2: Load the data

data = pd.read_csv('TSLA.csv')

In [5]:
#Step 3: Clean and preprocess the data

# Drop any missing values
data = data.dropna()

# Convert the 'Date' column to a datetime object
data['Date'] = pd.to_datetime(data['Date'])

# Create a new column for the month of each datapoint
data['Month'] = data['Date'].dt.month

# Create a new column for the day of each datapoint
data['Day'] = data['Date'].dt.day

# Create a new column for the year of each datapoint
data['Year'] = data['Date'].dt.year

# Create a new column for the difference between the high and low prices
data['PriceDiff'] = data['High'] - data['Low']


In [6]:
#Define the functions for News Headlines and Socialmediamentions

import requests
from bs4 import BeautifulSoup
import datetime

def get_news_headlines(date=None):
    if date is None:
        date = datetime.date.today()
    
    #date= datetime.datetime(2010, 6, 29)
    url = f"https://web.archive.org/web/{date.strftime('%Y%m%d')}.*/https://news.yahoo.com"
    #print(url)
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    headline_links = soup.find_all('a', class_=['showtt', 'yltasis'])
    headlines = [link.get_text().strip() for link in headline_links]
    print("\n Today's NEWS ", date ,'\n')
    print(headlines)
    return headlines

headd=get_news_headlines()
#print(headd)   





 Today's NEWS  2023-02-22 

[]


In [7]:
#Step 4: Define the features and target variable

# Define the features and target variable
X = data[['Day', 'Month', 'Year', 'PriceDiff']]

# Get the news sentiment for each datapoint
analyzer = SentimentIntensityAnalyzer()
news_sentiment = []

for date in data['Date'].iloc[:10]:

    # Get the news headlines for the current date
    headlines = get_news_headlines(date)
    # Calculate the sentiment score for the headlines
    sentiment_score = 0
    for headline in headlines:
        sentiment_score += analyzer.polarity_scores(headline)['compound']
    news_sentiment.append(sentiment_score)
    print('The Sentiment Score for ', date, ' is : ', sentiment_score)
    print(news_sentiment)
#X['NewsSentiment'] = news_sentiment
y = data['Close']



 Today's NEWS  2010-06-29 00:00:00 

['', "Kagan insists she didn't block military at Harvard", 'Full\xa0Story\xa0»', 'Video:  Kagan denies she blocked military at Harvard Law', 'Slideshow:  Supreme Court Justice Nominee Elena Kagan', 'Kagan embraces notion of living Constitution', 'A short course in Legalese 101', 'Analysis: Republicans resurrect Marshall as target', '', 'Petraeus leaves room for changes in Afghan pullout', '', 'Spy suspects had interests in science, finance', 'US accepts international assistance for Gulf spill', 'N.Korea warns accident during exercise could start war', 'Oil prices plummet on concerns about US demand', 'Wall St sinks as economic alarm escalates', 'Father of missing Oregon boy leaves stepmother', 'Medical marijuana user sues over Walmart firing', "Is 'The Office' without Carell out of business?", "Kagan insists she didn't block military at Harvard", 'Spy suspects had interests in science, finance', 'Petraeus leaves room for changes in Afghan pullout',

KeyboardInterrupt: 

In [None]:
#Step 5: Split the data into training and testing sets

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
#Step 6: Train the model

# Create a linear regression model
lr = LinearRegression()

# Fit the model to the training data
lr.fit(X_train, y_train)



In [None]:
#Step 7: Evaluate the model

# Make predictions on the testing set
y_pred = lr.predict(X_test)

# Calculate the mean squared error of the model
mse = np.mean((y_pred - y_test) ** 2)
print("Mean Squared Error:", mse)


In [None]:
#Step 8: Make predictions

# Predict the closing price for a given set of features
new_data = np.array([[15, 3, 2023, 10, 0.5]])
predicted_price = lr.predict(new_data)
print("Predicted price:", predicted_price)
