# Importing the historical prices

In [None]:
#Dataset imports
import yfinance as yf

#Standard imports
import pandas as pd
import numpy as np

#Visualization imports
from matplotlib import pyplot as plt
import seaborn as sns

#Model imports
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

#To hide warnings
import warnings
warnings.filterwarnings('ignore')

import math

In [None]:
ticker = "ORCL"
start_date = "2022-06-22"
end_date = "2023-06-22"
df = yf.download(ticker, start_date, end_date)
# new.to_csv('Dataset.csv')
# df = pd.read_csv('Dataset.csv')

In [None]:
df


In [None]:
df.index

# Plotting the data

In [None]:
df.plot.line(y="Close", use_index=True)

In [None]:
print(df.describe())


# Keeping only the required columns

In [None]:

df = df[["Close"]]

In [None]:
df

In [None]:
print(df.describe())

# Adding technical indicators

In [None]:
import pandas_ta

In [None]:
#Appending EMA(Estimated Moving Average) column to dataframe by appending
df.ta.ema(close='close', length = 10, append = True)
df

In [None]:
print(df.info())

# Checking and visualizing correlation

In [None]:
plt.scatter(df['Close'], df['EMA_10'])
plt.grid()
plt.show()

In [None]:
print(df.corr())
plt.figure(figsize=(4,2))
sns.heatmap(df.corr(), cmap="Greens", annot=True)
plt.show()

In [None]:
sns.pairplot(df)

# Removing NULL values from DataFrame

In [None]:
# Dropping the first n (10 in this case) rows 
df=df.iloc[10:]
df

In [None]:
print(df.info())

In [None]:
plt.plot(df)
plt.legend(df)
plt.show()

# Splitting the data

In [None]:
from sklearn.model_selection import train_test_split

#Using an 80-20 split into training and testing sets

var1 = df[['Close']] #Independent Variable
var2 = df[['EMA_10']] #Dependent Variable

X_train, X_test, y_train, y_test = train_test_split(var1, var2, test_size=.2)

In [None]:
#training set
print(X_train.describe())
print()
print(y_train.describe())

In [None]:
#test set
print(X_test.describe())
print()
print(y_test.describe())

# Training the model

In [None]:
from sklearn.linear_model import LinearRegression

#Creating the regression model
model = LinearRegression()

#Training the model
model.fit(X_train, y_train)

#Using model to make predictions
y_pred = model.predict(X_test)
y_pred


# Vaildation of the model

In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
print("Model Coefficients:", model.coef_)
print("Mean Absolute Error:", mean_absolute_error(y_test, y_pred))
print("Coefficient of Determination:", r2_score(y_test, y_pred))
print("Root Mean Squared Error:", math.sqrt(mean_squared_error(y_test, y_pred)))
print("Intercept", model.intercept_)


In [None]:
#Testing data accuracy (With test set)

plt.scatter(X_test, y_test, color = 'red', edgecolor='black',linewidth=0.1, label = 'Real Values', s=20)
plt.plot(X_test, model.predict(X_test), label = 'Predicted Values')
plt.grid()
plt.legend()
plt.xlabel('Close')
plt.ylabel('EMA_10')
plt.figure(figsize=(1, 1))

In [None]:
#Testing data accuracy (With training set)

plt.scatter(X_train, y_train, color = 'red', edgecolor='black',linewidth=0.1, label = 'Real Values', s=20)
plt.plot(X_train, model.predict(X_train), label = 'Predicted Values')
plt.grid()
plt.legend()
plt.xlabel('Close')
plt.ylabel('EMA_10')
plt.figure(figsize=(1, 1))

In [None]:
#Residual Plot
residual_model = y_pred - y_test
plt.subplots(figsize=(6, 3))
plt.title("Distribution of Residuals")
sns.distplot(residual_model)

# Comparing Real v/s Predicted Values

In [None]:
comparison = y_test
comparison['predictions'] = y_pred
comparison = comparison.rename(columns={"EMA_10": "Actual_Values","predictions": "Predicted_Values"})
comparison.head(10)

In [None]:
graph = comparison.head(15)
graph.plot(kind='bar', ylabel='Prices')


In [None]:
comparison.plot.line()

In [None]:
import math
x2=abs(y_pred-y_test)
x2 = x2.drop(['predictions'], axis=1)

In [None]:
y_test = y_test.drop(['predictions'], axis = 1)

In [None]:

y2 = 100 * (x2/y_test)
accuracy = 100 - np.mean(y2)
print('Accuracy: ', round(accuracy,2), '%')

# Strategy Checking

In [None]:
ticker = "ORCL"
start_date = "2023-01-22"
end_date = "2023-06-22"
new = yf.download(ticker, start_date, end_date)

In [None]:
new

In [None]:
new = new.drop(['High','Low','Adj Close','Volume'], axis=1)

In [None]:
new

In [None]:
new.ta.ema(close='close', length = 10, append = True)

In [None]:
print(new)
new=new.iloc[10:]
new

In [None]:
#Predicting the values
close_values = new[['EMA_10']]
my_pred = model.predict(close_values)

In [None]:
new['Predicted Values']=my_pred

In [None]:
new

In [None]:
df_test=pd.read_csv('Model_1_Predictions.csv')
df_test

In [None]:
proj_close=df_test[['Predicted Values']]
proj_close=model.predict(proj_close)

In [None]:
new['Predicted Values Combination']=proj_close
new

In [None]:
new.to_csv('Model_Combination_Predictions.csv')