In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
from sklearn.datasets import load_diabetes

In [None]:
diabetes = load_diabetes()


In [None]:
diabetes.keys()

In [None]:
#Checking Description of the dataset
print(diabetes.DESCR)

In [None]:
print(diabetes.target)

In [None]:
dataset = pd.DataFrame(diabetes.data, columns = diabetes.feature_names)
dataset['Diabetes Progress'] = diabetes.target
dataset.head()

In [None]:
dataset.info()

In [None]:
dataset.head()

In [None]:
#summarising stats
dataset.describe()

In [None]:
dataset.isnull().sum()

In [None]:
#Checking for Correlation
#checking multicolinearity. (When features are highly correlated with each other, it can be challenging to determine the individual effect of each
#feature on the target variable. This can make the coefficient estimates unstable and less interpretable.)
#using pearson correlation here. ( means values varies between -1 to +1 where, -1 is negatively correlated and positively correlated otherwise)
dataset.corr()


In [None]:
plt.scatter(dataset['Diabetes Progress'], dataset['s6'])
plt.xlabel("Diabetes Progress")
plt.ylabel("Blood Sugar Level")

In [None]:
import seaborn as sns
sns.regplot(x = 's6', y = 'Diabetes Progress', data = dataset)

In [None]:
plt.scatter(dataset['s3'], dataset['Diabetes Progress'])
plt.xlabel("high-density lipoproteins")
plt.ylabel("Diabetes Progress ")

In [None]:
import seaborn as sns
sns.regplot(x = 's3', y = 'Diabetes Progress', data = dataset)

In [None]:
# Independent and Dependent Features

X = dataset.iloc[:, :-1] # takes all input features except target variable
y = dataset.iloc[:, -1]
y.head()

In [None]:
#Train Test Flip
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)


In [None]:
# Standardizing the scaling of dataset ( So that GLobal Minimum can be achieved easily )
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
#training model
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)  # didnt use Fit_transform because we dont want model to be aware of test data!

In [None]:
import pickle
pickle.dump(scaler, open('scaling.pkl', 'wb'))

In [None]:
from sklearn.linear_model import LinearRegression
regress =  LinearRegression()
regress.fit(X_train, y_train)

In [None]:
#Printing Coeff and intercept
print(regress.coef_) #gives coeff of all the number of independent features

In [None]:
print(regress.intercept_)

In [None]:
#parameters on which model is trained
regress.get_params()

In [None]:
#prediction with x_test
reg_pred = regress.predict(X_test)

In [None]:
reg_pred

In [None]:
#plotting Scatter plot for the prediction
plt.scatter(y_test, reg_pred)

In [None]:
#finding error
Residuals = y_test - reg_pred

In [None]:
Residuals

In [None]:
#plotting errors
sns.displot(Residuals, kind ='kde')

In [None]:
#plotting prediction and residual scatter plot
plt.scatter(reg_pred, Residuals)

In [None]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

print ("MAE",mean_absolute_error(y_test, reg_pred))
print ("MSE",mean_squared_error(y_test, reg_pred))
print ("RMSE",np.sqrt(mean_squared_error(y_test, reg_pred)))

In [None]:
# calculating R^2 and adjusted R^2
from sklearn.metrics import r2_score
score = r2_score(y_test, reg_pred)
print(score)  #more it is towards one the more better it is

In [None]:
1 - (1-score)*(len(y_test-1))/(len(y_test)-X_test.shape[1]-1)

In [None]:
#New Data prediction
diabetes.data[0].reshape(1, -1)  #actual shape of it was (10, )

In [None]:
#transformation of new data
df = scaler.transform(diabetes.data[0].reshape(1, -1))

In [None]:
regress.predict(df)

# PICKLING


In [None]:
#Pickling model
import pickle

In [None]:
pickle.dump(regress,open("/content/drive/MyDrive/ML_PROJECTS/regressmodel.pkl",'wb'))

In [None]:
pickled_model = pickle.load(open("/content/drive/MyDrive/ML_PROJECTS/regressmodel.pkl", 'rb'))

In [None]:
pickled_model.predict(df)