## Linear Regression on Iris data set

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot
import warnings
warnings.filterwarnings('ignore')

In [2]:
#Loading the dataset
df=sns.load_dataset('iris')
df.head(3)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa


In [3]:
#Checking for any null value
df.isna().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [4]:
#Extracting Dependent and Independent variables
Y=df[['sepal_length']]
X=df.drop(['sepal_length'],axis=1)
X.head(3)

Unnamed: 0,sepal_width,petal_length,petal_width,species
0,3.5,1.4,0.2,setosa
1,3.0,1.4,0.2,setosa
2,3.2,1.3,0.2,setosa


In [5]:
# Extracting categorical and numerical columns for pre-processing
num=[i for i in X.describe().columns]
cat=[i for i in X.columns if i not in num]

### Encoding

In [6]:
from sklearn.preprocessing import MinMaxScaler
mn=MinMaxScaler()
mn.fit(X[num])
X[num]=mn.transform(X[num])
X.head()

Unnamed: 0,sepal_width,petal_length,petal_width,species
0,0.625,0.067797,0.041667,setosa
1,0.416667,0.067797,0.041667,setosa
2,0.5,0.050847,0.041667,setosa
3,0.458333,0.084746,0.041667,setosa
4,0.666667,0.067797,0.041667,setosa


In [7]:
from sklearn.preprocessing import LabelEncoder
lb=LabelEncoder()
lb.fit(X[cat])
X[cat]=lb.transform(X[cat])
X.head()

Unnamed: 0,sepal_width,petal_length,petal_width,species
0,0.625,0.067797,0.041667,0
1,0.416667,0.067797,0.041667,0
2,0.5,0.050847,0.041667,0
3,0.458333,0.084746,0.041667,0
4,0.666667,0.067797,0.041667,0


### Splitting the data set

In [8]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(X,Y,test_size=0.3,random_state=1)

### Linear Regression_Method 1- Using numpy 

In [9]:
from numpy.linalg import inv
m1=np.dot(xtrain.T,xtrain)
m2=inv(m1)

In [10]:
m3=np.dot(xtrain.T,ytrain)
b=np.dot(m2,m3)

In [11]:
ytrain_pred=np.dot(xtrain,b)
ytest_pred=np.dot(xtest,b)

In [12]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
mse_train=mean_squared_error(ytrain,ytrain_pred)
mae_train=mean_absolute_error(ytrain,ytrain_pred)
r2_train=r2_score(ytrain,ytrain_pred)

mse_test=mean_squared_error(ytest,ytest_pred)
mae_test=mean_absolute_error(ytest,ytest_pred)
r2_test=r2_score(ytest,ytest_pred)

In [13]:
print("Train analysis")
print(f"mse:{mse_train}")
print(f"mae:{mae_train}")
print(f"r2:{r2_train}")

print("*"*100)
print("Test analysis")
print(f"mse:{mse_test}")
print(f"mae:{mae_test}")
print(f"r2:{r2_test}")

Train analysis
mse:1.0456294717204635
mae:0.7999148923641392
r2:-0.4800632856652558
****************************************************************************************************
Test analysis
mse:0.9133455169227045
mae:0.7412449711787509
r2:-0.5038008551658477


### Linear Regression_Method 2-Using sklearn

In [14]:
from sklearn.linear_model import LinearRegression
ln=LinearRegression()
ln.fit(xtrain,ytrain)
ytrain_pred1=ln.predict(xtrain)
coef=ln.coef_

In [15]:
#using sklearn predict function
ytest_pred1=ln.predict(xtest)
#using coefficcient recievd from sklearn coef_
ytest_pred2=np.dot(xtest,coef.T)

In [16]:
mse_train=mean_squared_error(ytrain,ytrain_pred1)
mae_train=mean_absolute_error(ytrain,ytrain_pred1)
r2_train=r2_score(ytrain,ytrain_pred1)

#using sklearn predict functoion
mse_test1=mean_squared_error(ytest,ytest_pred1)
mae_test1=mean_absolute_error(ytest,ytest_pred1)
r2_test1=r2_score(ytest,ytest_pred1)

#using coefficcient recievd from sklearn coef_
mse_test2=mean_squared_error(ytest,ytest_pred2)
mae_test2=mean_absolute_error(ytest,ytest_pred2)
r2_test2=r2_score(ytest,ytest_pred2)

In [17]:
print("Train analysis")
print(f"mse:{mse_train}")
print(f"mae:{mae_train}")
print(f"r2:{r2_train}")

#using sklearn predict functoion
print("*"*100)
print("Test analysis-#using sklearn predict functoion")
print(f"mse:{mse_test1}")
print(f"mae:{mae_test1}")
print(f"r2:{r2_test1}")

#using coefficcient recievd from sklearn coef_
print("*"*100)
print("Test analysis-using coefficcient recievd from sklearn coef_")
print(f"mse:{mse_test2}")
print(f"mae:{mae_test2}")
print(f"r2:{r2_test2}")

Train analysis
mse:0.08083881467611534
mae:0.23376094829799096
r2:0.8855746085064423
****************************************************************************************************
Test analysis-#using sklearn predict functoion
mse:0.13668579024197416
mae:0.2944793177372995
r2:0.7749502193349072
****************************************************************************************************
Test analysis-using coefficcient recievd from sklearn coef_
mse:15.234964918004058
mae:3.8860352410269665
r2:-24.083993787265804
