## Importing the essential libraries over here

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')

## Importing the dataset over here

In [2]:
data=pd.read_csv("Tesla Dataset.csv")

In [3]:
data.head()

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2010-06-29,1.266667,1.666667,1.169333,1.592667,1.592667,281494500
1,2010-06-30,1.719333,2.028,1.553333,1.588667,1.588667,257806500
2,2010-07-01,1.666667,1.728,1.351333,1.464,1.464,123282000
3,2010-07-02,1.533333,1.54,1.247333,1.28,1.28,77097000
4,2010-07-06,1.333333,1.333333,1.055333,1.074,1.074,103003500


## Taking care of duplicate observations if present over here

In [4]:
data.duplicated().sum()

0

## Taking care of missing values if present over here

In [5]:
data.isnull().sum()

Date         0
Open         0
High         0
Low          0
Close        0
Adj Close    0
Volume       0
dtype: int64

## Filtering all the numerical features over here

In [6]:
numerical_features=[feature for feature in data.columns if data[feature].dtype!='O']
for feature in numerical_features:
  print(feature)

Open
High
Low
Close
Adj Close
Volume


In [7]:
data[numerical_features]

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
0,1.266667,1.666667,1.169333,1.592667,1.592667,281494500
1,1.719333,2.028000,1.553333,1.588667,1.588667,257806500
2,1.666667,1.728000,1.351333,1.464000,1.464000,123282000
3,1.533333,1.540000,1.247333,1.280000,1.280000,77097000
4,1.333333,1.333333,1.055333,1.074000,1.074000,103003500
...,...,...,...,...,...,...
3504,178.500000,180.320007,173.820007,178.080002,178.080002,67314600
3505,178.130005,182.639999,174.490005,176.289993,176.289993,68568900
3506,174.779999,177.759995,174.000000,174.770004,174.770004,60056300
3507,175.350006,176.149994,172.130005,175.000000,175.000000,57614800


## Filteirng all the categorical features over here

In [8]:
cat_features=[feature for feature in data.columns if data[feature].dtype=="O"]
for feature in cat_features:
  print(feature)

Date


In [9]:
data[cat_features]

Unnamed: 0,Date
0,2010-06-29
1,2010-06-30
2,2010-07-01
3,2010-07-02
4,2010-07-06
...,...
3504,2024-05-31
3505,2024-06-03
3506,2024-06-04
3507,2024-06-05


## Encoding the categorical features over here

In [10]:
for feature in cat_features:
  feature_mapping={category:index for index,category in enumerate(data[feature].unique())}
  data[feature]=data[feature].map(feature_mapping)

In [11]:
data

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,0,1.266667,1.666667,1.169333,1.592667,1.592667,281494500
1,1,1.719333,2.028000,1.553333,1.588667,1.588667,257806500
2,2,1.666667,1.728000,1.351333,1.464000,1.464000,123282000
3,3,1.533333,1.540000,1.247333,1.280000,1.280000,77097000
4,4,1.333333,1.333333,1.055333,1.074000,1.074000,103003500
...,...,...,...,...,...,...,...
3504,3504,178.500000,180.320007,173.820007,178.080002,178.080002,67314600
3505,3505,178.130005,182.639999,174.490005,176.289993,176.289993,68568900
3506,3506,174.779999,177.759995,174.000000,174.770004,174.770004,60056300
3507,3507,175.350006,176.149994,172.130005,175.000000,175.000000,57614800


## Creating the features and labels over here

In [12]:
data['LOW']=data['Low']

In [13]:
data.drop(['Low'],axis=1,inplace=True)

In [14]:
X=data.iloc[:,:-1].values
y=data.iloc[:,-1].values

## Splitting the dataset into training set and testing set over here

In [15]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

## Training the models on the training set over here

In [16]:
from sklearn.ensemble import RandomForestRegressor
regressor=RandomForestRegressor()
regressor.fit(X_train,y_train)

## Evaluating the performance of the model on the testing dataset over here

In [17]:
y_pred=regressor.predict(X_test)
np.set_printoptions(precision=2)
print(np.concatenate((y_pred.reshape(len(y_pred),1),y_test.reshape(len(y_test),1)),1))

[[339.62 339.73]
 [  1.89   1.9 ]
 [ 16.56  16.63]
 ...
 [ 11.67  11.55]
 [  1.87   1.88]
 [ 22.09  22.17]]


## Evaluating the performance of the model using r2 score over here

In [18]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.9995656120098833