In [13]:
import pandas as pd 
import numpy as np

In [14]:
df_orig = pd.read_csv('equity_stocks.csv')

df_orig.head()

Unnamed: 0,DATE,CODE,NAME,12m Low,12m High,Day Low,Day High,Day Price,Previous,Change,Change%,Volume,Adjust,Date,Adjusted
0,1/3/2012,EQTY,Equity Bank,18.5,34.25,16.5,17.0,16.7,16.4,0.3,1.80%,141700,-,,
1,1/4/2012,EQTY,Equity Bank,18.5,34.25,16.4,16.8,16.55,16.7,-0.15,0.91%,912500,-,,
2,1/5/2012,EQTY,Equity Bank,18.5,34.25,15.8,16.75,16.15,16.55,-0.4,2.48%,748700,-,,
3,1/6/2012,EQTY,Equity Bank,18.5,34.25,15.5,16.0,15.8,16.15,-0.35,2.22%,3510000,-,,
4,1/9/2012,EQTY,Equity Bank,18.5,34.25,15.8,16.5,15.95,15.8,0.15,0.94%,1090000,-,,


### Clean the data by handling missing values

In [15]:
# Make a copy of the df that I will manipulate
df = df_orig.copy()

df.head()

Unnamed: 0,DATE,CODE,NAME,12m Low,12m High,Day Low,Day High,Day Price,Previous,Change,Change%,Volume,Adjust,Date,Adjusted
0,1/3/2012,EQTY,Equity Bank,18.5,34.25,16.5,17.0,16.7,16.4,0.3,1.80%,141700,-,,
1,1/4/2012,EQTY,Equity Bank,18.5,34.25,16.4,16.8,16.55,16.7,-0.15,0.91%,912500,-,,
2,1/5/2012,EQTY,Equity Bank,18.5,34.25,15.8,16.75,16.15,16.55,-0.4,2.48%,748700,-,,
3,1/6/2012,EQTY,Equity Bank,18.5,34.25,15.5,16.0,15.8,16.15,-0.35,2.22%,3510000,-,,
4,1/9/2012,EQTY,Equity Bank,18.5,34.25,15.8,16.5,15.95,15.8,0.15,0.94%,1090000,-,,


In [16]:
df.isna().sum()

DATE          248
CODE            0
NAME            0
12m Low         0
12m High        0
Day Low         0
Day High        0
Day Price       0
Previous        0
Change          0
Change%         0
Volume          0
Adjust        248
Date         1252
Adjusted     1252
dtype: int64

In [17]:
# Remove columns with a lot of missing values, and the Code and Name
columns = ['Adjust', 'Date', 'Adjusted', 'CODE', 'NAME']

df = df.drop(columns, axis=1)

df.head()

Unnamed: 0,DATE,12m Low,12m High,Day Low,Day High,Day Price,Previous,Change,Change%,Volume
0,1/3/2012,18.5,34.25,16.5,17.0,16.7,16.4,0.3,1.80%,141700
1,1/4/2012,18.5,34.25,16.4,16.8,16.55,16.7,-0.15,0.91%,912500
2,1/5/2012,18.5,34.25,15.8,16.75,16.15,16.55,-0.4,2.48%,748700
3,1/6/2012,18.5,34.25,15.5,16.0,15.8,16.15,-0.35,2.22%,3510000
4,1/9/2012,18.5,34.25,15.8,16.5,15.95,15.8,0.15,0.94%,1090000


In [18]:
# Drop rows that have missing values
df = df.dropna(subset=['DATE'])

df.isna().sum()

DATE         0
12m Low      0
12m High     0
Day Low      0
Day High     0
Day Price    0
Previous     0
Change       0
Change%      0
Volume       0
dtype: int64

In [19]:
# Convert date column to datetime type
df["DATE"] = pd.to_datetime(df['DATE'], format='%d/%m/%Y', errors='coerce')


# Extract Date Data to Different Columns
df['year'] = df['DATE'].dt.year
df['month'] = df['DATE'].dt.month
df['day'] = df['DATE'].dt.day

# Drop the date column
df = df.drop("DATE", axis=1)

In [20]:
df.head()

Unnamed: 0,12m Low,12m High,Day Low,Day High,Day Price,Previous,Change,Change%,Volume,year,month,day
0,18.5,34.25,16.5,17.0,16.7,16.4,0.3,1.80%,141700,2012.0,3.0,1.0
1,18.5,34.25,16.4,16.8,16.55,16.7,-0.15,0.91%,912500,2012.0,4.0,1.0
2,18.5,34.25,15.8,16.75,16.15,16.55,-0.4,2.48%,748700,2012.0,5.0,1.0
3,18.5,34.25,15.5,16.0,15.8,16.15,-0.35,2.22%,3510000,2012.0,6.0,1.0
4,18.5,34.25,15.8,16.5,15.95,15.8,0.15,0.94%,1090000,2012.0,9.0,1.0


### Prediction using 