**Importing Dependencies**

In [71]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

***Handling the missing data ***

In [13]:
dataset=pd.read_csv('/content/drive/MyDrive/Machine Learning Task List /Data Set For Task/2) Stock Prices Data Set.csv')
dataset.head()

Unnamed: 0,symbol,date,open,high,low,close,volume
0,AAL,2014-01-02,25.07,25.82,25.06,25.36,8998943
1,AAPL,2014-01-02,79.3828,79.5756,78.8601,79.0185,58791957
2,AAP,2014-01-02,110.36,111.88,109.29,109.74,542711
3,ABBV,2014-01-02,52.12,52.33,51.52,51.98,4569061
4,ABC,2014-01-02,70.11,70.23,69.48,69.89,1148391


In [14]:
dataset.shape

(497472, 7)

In [15]:
dataset.isnull().sum()

Unnamed: 0,0
symbol,0
date,0
open,11
high,8
low,8
close,0
volume,0


In [16]:
dataset['open'].fillna(dataset['open'].mean(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset['open'].fillna(dataset['open'].mean(),inplace=True)


In [17]:
dataset.isnull().sum()

Unnamed: 0,0
symbol,0
date,0
open,0
high,8
low,8
close,0
volume,0


In [18]:
dataset['high'].fillna(dataset['high'].median(),inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  dataset['high'].fillna(dataset['high'].median(),inplace=True)


In [19]:
dataset.isnull().sum()

Unnamed: 0,0
symbol,0
date,0
open,0
high,0
low,8
close,0
volume,0


In [20]:
dataset=dataset.dropna(how='any')

In [21]:
dataset.isnull().sum()

Unnamed: 0,0
symbol,0
date,0
open,0
high,0
low,0
close,0
volume,0


# **Label Encoding**

In [42]:
dataset=pd.read_csv('/content/drive/MyDrive/Machine Learning Task List /Data Set For Task/2) Stock Prices Data Set.csv')
dataset.head()

Unnamed: 0,symbol,date,open,high,low,close,volume
0,AAL,2014-01-02,25.07,25.82,25.06,25.36,8998943
1,AAPL,2014-01-02,79.3828,79.5756,78.8601,79.0185,58791957
2,AAP,2014-01-02,110.36,111.88,109.29,109.74,542711
3,ABBV,2014-01-02,52.12,52.33,51.52,51.98,4569061
4,ABC,2014-01-02,70.11,70.23,69.48,69.89,1148391


In [43]:
label_encoder=LabelEncoder()

In [54]:
label=label_encoder.fit_transform(dataset['symbol'])
dataset['symbol'] = label

In [76]:
label=label_encoder.fit_transform(dataset['date'])
dataset['date'] = label

In [94]:
dataset.head()

Unnamed: 0,symbol,date,open,high,low,close,volume,target
0,1,0,25.07,25.82,25.06,25.36,8998943,1
1,3,0,79.3828,79.5756,78.8601,79.0185,58791957,3
2,2,0,110.36,111.88,109.29,109.74,542711,2
3,4,0,52.12,52.33,51.52,51.98,4569061,4
4,5,0,70.11,70.23,69.48,69.89,1148391,5


**Splitting the data into training data and testing data**

In [93]:
X=dataset.drop(columns = 'symbol',axis=1)
Y=dataset['symbol']

In [79]:
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=3)


In [80]:
X.shape,X_train.shape,X_test.shape

((497472, 7), (397977, 7), (99495, 7))

**Standardizing the data**

In [81]:
scaler=StandardScaler()

In [82]:
X_train_standardized=scaler.fit_transform(X_train)

In [83]:
print(X_train_standardized)

[[ 1.19911586 -0.32787252 -0.32606789 ... -0.3389539  -0.03643765
  -1.4267239 ]
 [ 1.56039304 -0.14306455 -0.1468032  ... -0.1430485   0.74027554
  -0.69990342]
 [ 1.0855716   0.44145129  0.43448169 ...  0.44080701 -0.4097863
   1.21999975]
 ...
 [-1.19563574  1.77193011  1.77317461 ...  1.78808972 -0.47858073
  -1.6529982 ]
 [-0.34233345  0.62754609  0.62218929 ...  0.62354646 -0.31052489
  -1.45415109]
 [-1.23692456 -0.21987814 -0.21935336 ... -0.21481778 -0.38198094
   0.67145599]]


In [87]:
X_test_standardized=scaler.fit_transform(X_test)

In [86]:
print(X_test_standardized)

[[-0.65719484 -0.61186767 -0.61239603 ... -0.61261036  0.3744444
   0.06436609]
 [ 1.16344027 -0.52905154 -0.5305498  ... -0.52758938 -0.29812079
   0.03007025]
 [ 0.41660129  1.51084035  1.52910283 ...  1.53441142 -0.34387343
  -1.25945357]
 ...
 [ 1.03609906 -0.4244417  -0.42843414 ... -0.44314941 -0.29590496
  -1.21143938]
 [-0.2235464   0.99738038  0.98274083 ...  0.94313797 -0.26046073
   0.34559203]
 [-1.61397472 -0.29048299 -0.28280165 ... -0.28298461 -0.04758168
  -0.82732591]]
