In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/mwitiderrick/stockprice/master/NSE-TATAGLOBAL.csv')

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.shape

In [None]:
df.describe()

# Data cleaning


In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
stk=df.copy()

In [None]:
stk.columns

In [None]:
stk['Date'] = pd.to_datetime(stk['Date'])

In [None]:
stk.info()

In [None]:
stk.head()

In [None]:
stk['Date'].min()

In [None]:
stk['Date'].max()

In [None]:
stk['Month Year'] = stk['Date'].apply(lambda x: x.strftime ('%y-%m'))
stk['Year'] = stk['Date'].apply(lambda x: x.strftime ('%y'))
stk.head()

In [None]:
a = stk.groupby('Month Year')['Close'].mean()
plt.figure(figsize=(25,5))
plt.plot(a, color = 'green')
plt.xticks(rotation = 'vertical')
plt.show()

In [None]:
pc = stk[['Year', 'Total Trade Quantity']]
grp_pc = pc.groupby('Year')['Total Trade Quantity'].mean().sort_values(ascending = False).reset_index()

plt.pie(grp_pc['Total Trade Quantity'], labels=grp_pc['Year'])
plt.show()

In [None]:
trd = stk[['Year','Turnover (Lacs)']]
a = trd.groupby('Year')['Turnover (Lacs)'].mean().sort_values(ascending = False).reset_index()

sns.barplot(a['Year'], a['Turnover (Lacs)'])
plt.show()

# machine learning

In [None]:
plt.figure(figsize = (10,8))
sns.heatmap(stk.corr(), annot = True, cmap = 'YlGnBu')
plt.show()

# linear regression

In [None]:
stk.columns

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [None]:
x = stk[['Close', 'Total Trade Quantity']]
y = stk['Turnover (Lacs)']

In [None]:
x.head()

In [None]:
y.head()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x,y, test_size = 0.3, random_state = 0)

In [None]:
lr=LinearRegression()

In [None]:
lr.fit(X_train, y_train)

In [None]:
X_train.shape, X_test.shape

In [None]:
y_train.shape, y_test.shape

In [None]:
y_train = y_train.values.reshape(-1,1)
y_test = y_test.values.reshape(-1,1)

In [None]:
y_train.shape, y_test.shape

In [None]:
lr.score(X_test, y_test)

In [None]:
y_train_pred = lr.predict(X_train)
y_train_pred

In [None]:
y_test_pred = lr.predict(X_test)
y_test_pred

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
print("R2Score : " ,r2_score(y_test, y_test_pred)) 
print("mean_absolute_error : ",mean_absolute_error(y_test, y_test_pred))
print("mean_squared_error : " ,mean_squared_error(y_test, y_test_pred)) 
print("Root mean_squared_error : ",np.sqrt(mean_squared_error(y_test, y_test_pred)))

# random forest

In [None]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train_std = sc.fit_transform(X_train)
X_test_std = sc.transform(X_test) 

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf_tree = RandomForestRegressor(random_state=0)
rf_tree.fit(X_train_std,y_train)
rf_tree_y_pred = rf_tree.predict(X_train_std)
print("Accuracy: {}".format(rf_tree.score(X_train_std,y_train)))
print("R squared: {}".format(r2_score(y_true=y_train,y_pred=rf_tree_y_pred)))