<a href="https://colab.research.google.com/github/MusabUmama/TSLA_Stock_Price_Prediction/blob/main/Tesla_Stock_Price_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Tesla Stock Price Prediction**

**The Dataset** - Stock prices data of TSLA from 2023 January to August.
Source :- Nasdaq Website (https://www.nasdaq.com/market-activity/stocks/tsla/historical)

In [37]:
# Importing the pandas library
import pandas as pd

*Data cleaning & Pre processing*

In [38]:
# Importing the dataset
df = pd.read_csv('/content/tesla_historical_data_jan_to_aug_2023.csv')

# Filling the missing places with mean value
df.fillna(df.mean(), inplace=True)

# Converting the Date column to datetime format
df['Date'] = pd.to_datetime(df['Date'])

# Function to convert string with dollar sign into number
def convert_dollar_string_to_number(cell):
    if isinstance(cell, str) and cell.startswith('$'):
        # Remove dollar sign and commas, and convert to float
        return float(cell.replace('$', '').replace(',', ''))
    else:
        return cell

# Applying the function
df = df.applymap(convert_dollar_string_to_number)

  df.fillna(df.mean(), inplace=True)


In [39]:
# Calculating the daily returns
df['Daily_Return'] = df['Close/Last'].pct_change()

# Function to calculate moving averages
def calculate_moving_average(data, window):
    return data.rolling(window=window, min_periods=1).mean()

# Applying the function to find it for 7 days & 30 days
df['7_Day_MA'] = calculate_moving_average(df['Close/Last'], window=7)
df['30_Day_MA'] = calculate_moving_average(df['Close/Last'], window=30)

In [40]:
# Droping the daily return value with NaN
df.dropna(subset=['Daily_Return'], inplace=True)

In [41]:
print(df.head())

        Date  Close/Last     Volume    Open    High     Low  Daily_Return  \
1 2023-08-31      258.08  108861700  255.98  261.18  255.05      0.053345   
2 2023-08-30      256.90  121988400  254.20  260.51  250.59     -0.004572   
3 2023-08-29      257.18  134047600  238.58  257.48  237.77      0.001090   
4 2023-08-28      238.82  107673700  242.58  244.38  235.35     -0.071390   
5 2023-08-25      238.59  106612200  231.31  239.00  230.35     -0.000963   

     7_Day_MA   30_Day_MA  
1  251.545000  251.545000  
2  253.330000  253.330000  
3  254.292500  254.292500  
4  251.198000  251.198000  
5  249.096667  249.096667  


*Data splitting *

In [42]:
# Importing the scikit-learn & numpy libraries
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

In [43]:
# Date threshold to split the dataset
split_date = '2023-06-01'

# Spliting the dataset (Time based)
train_data = df[df['Date'] < split_date]
test_data = df[df['Date'] >= split_date]

*Feature selection* - Daily Return

In [44]:
# Features & target variables
X_train = train_data[['Open', 'High', 'Low', 'Volume', '7_Day_MA', '30_Day_MA']]
y_train = train_data['Daily_Return']
X_test = test_data[['Open', 'High', 'Low', 'Volume', '7_Day_MA', '30_Day_MA']]
y_test = test_data['Daily_Return']

*Model selection*

In [45]:
# The model (Linear Regression)
model = LinearRegression()

*Hyperparameter tuning & Model training*

In [46]:
# hyper parameters to tune
param_grid = {
    'fit_intercept': [True, False],
}

# Grid search
grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_absolute_error')

grid_search.fit(X_train, y_train)

In [47]:
# Best hyper parameters from the grid search
best_params = grid_search.best_params_
print("Best Hyperparameters:", best_params)

# The best model
best_model = grid_search.best_estimator_

Best Hyperparameters: {'fit_intercept': False}


*Prediction*

In [48]:
# Prediction using the best model
y_pred = best_model.predict(X_test)

*Model evaluation*

In [49]:
# Calculating the Mean absolute error, Mean squared error & Root mean squared error
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'Mean Absolute Error (MAE): {mae:.4f}')
print(f'Mean Squared Error (MSE): {mse:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')

Mean Absolute Error (MAE): 0.0217
Mean Squared Error (MSE): 0.0007
Root Mean Squared Error (RMSE): 0.0265


*Testing on new data*

In [50]:
# New values to predict
new_open_value = 250.00
new_high_value = 258.00
new_low_value = 247.00
new_volume_value = 108861700
new_7_day_ma_value = 255.50
new_30_day_ma_value = 253.00


new_data = pd.DataFrame({
    'Open': [new_open_value],
    'High': [new_high_value],
    'Low': [new_low_value],
    'Volume': [new_volume_value],
    '7_Day_MA': [new_7_day_ma_value],
    '30_Day_MA': [new_30_day_ma_value]
})

predicted_return = best_model.predict(new_data)
print(f'Predicted Daily Return for New Data: {predicted_return[0]:.4f}')

Predicted Daily Return for New Data: -0.0053
