In [290]:
# Dependencies
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import StratifiedKFold

In [291]:
# Load data
data = pd.read_csv('Data/Iris.csv')

In [292]:
# Data Preprocessing
data = data.drop('Id', axis=1)
data['Species'] = data['Species'].map(lambda x: 1 if x == 'Iris-setosa' else -1 if x == 'Iris-versicolor' else 0)

In [293]:
# Performing Linear Regression
def get_coeffs(Y,X):
    X_t = np.transpose(X)
    M = X_t @ X
    if np.linalg.det(M) == 0:
        return 'Not a single solution to the problem'
    return np.linalg.inv(M) @ X_t @ Y

In [294]:
# Train Test Split
X_train, X_test, Y_train, Y_test = train_test_split(data.drop('Species', axis=1), data['Species'], test_size=0.2)

In [295]:
betas = get_coeffs(np.array(Y_train), np.array(X_train))

In [296]:
# Predictions
def predict(X, betas):
    return X @ betas

In [297]:
predictions_train = predict(X_train, betas)
predictions_test = predict(X_test, betas)

In [298]:
# Evaluation
def MSE(Y, Y_pred):
    return np.mean((Y - Y_pred) ** 2)

In [299]:
print("Train MSE : ",MSE(Y_train, predictions_train))
print("Test MSE : ",MSE(Y_test, predictions_test))

Train MSE :  0.3124469402132547
Test MSE :  0.2713111516559914


In [300]:
# Cross validation
def cross_validation(X,y,k):
    skf = StratifiedKFold(n_splits=k)
    MSE_min = 100000
    betas_min = []
    for train_index, test_index in skf.split(X,y):
        X_train, X_test = X[train_index], X[test_index]
        Y_train, Y_test = y[train_index], y[test_index]
        betas = get_coeffs(Y_train, X_train)
        predictions = predict(X_test, betas)
        if MSE_min > MSE(Y_test, predictions):
            MSE_min = MSE(Y_test, predictions)
            betas_min = betas
    return MSE_min, betas_min

In [301]:
X_train.index = range(len(X_train))
Y_train.index = range(len(Y_train))
MSE_train, betas = cross_validation(np.array(X_train), np.array(Y_train), 5)
print("Training Accuracy : ", MSE_train)
print("Test Accuracy : ", MSE(Y_test,predict(X_test,betas)))

Training Accuracy :  0.2940421379925799
Test Accuracy :  0.26880273836636887


In [302]:
# Using Sklearn
model = LinearRegression()
model.fit(X_train, Y_train)
predictions_train = model.predict(X_train)
predictions_test = model.predict(X_test)
print("Train MSE : ",MSE(Y_train, predictions_train))
print("Test MSE : ",MSE(Y_test, predictions_test))

Train MSE :  0.29789096638330526
Test MSE :  0.24901541725765403
