In [46]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split

In [2]:
df = pd.read_csv("bmi.csv")

In [3]:
df.isnull().sum()

Age         0
Height      0
Weight      0
Bmi         0
BmiClass    0
dtype: int64

In [25]:
x = df["BmiClass"].value_counts()

In [20]:
maps = {
    "Normal Weight" : 1,
    "Overweight" : 2,
    "Underweight" : 3,
    "Obese Class 3 " : 4,
    "Obese Class 2" : 5,
    "Obese Class 1" : 6
    
}

In [23]:
df["BmiClass"] = df["BmiClass"].map(maps)

In [24]:
df

Unnamed: 0,Age,Height,Weight,Bmi,BmiClass
0,61,1.85,109.30,31.935720,6.0
1,60,1.71,79.02,27.023700,2.0
2,60,1.55,74.70,31.092612,6.0
3,60,1.46,35.90,16.841809,3.0
4,60,1.58,97.10,38.896010,5.0
...,...,...,...,...,...
736,34,1.86,95.70,27.662157,2.0
737,44,1.91,106.90,29.302925,2.0
738,25,1.82,88.40,26.687598,2.0
739,35,1.88,98.50,27.868945,2.0


In [28]:
df["BmiClass"].value_counts()

BmiClass
1.0    342
2.0    166
3.0     96
5.0     55
6.0     20
Name: count, dtype: int64

In [29]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741 entries, 0 to 740
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Age       741 non-null    int64  
 1   Height    741 non-null    float64
 2   Weight    741 non-null    float64
 3   Bmi       741 non-null    float64
 4   BmiClass  679 non-null    float64
dtypes: float64(4), int64(1)
memory usage: 29.1 KB


In [75]:
class Linear(BaseEstimator, ClassifierMixin):
    def __init__(self, n_iteration, alpha):
        self.n_iteration = n_iteration
        self.alpha = alpha
        self.cost_list = []
        
    def fit(self, X, y):
        X = np.c_[X, np.ones(X.shape[0])]
        self.theta = np.zeros(X.shape[1])
        m = X.shape[0]
        
        for i in range(self.n_iteration):
            
            h_x = np.dot(X, self.theta)

            cost = (1/m)* np.sum((y - h_x)**2)          

            d_cost = (-2/m)*np.dot(X.T, (y-h_x))
            
            self.cost_list.append(cost)
            self.theta -= self.alpha * d_cost
                
        return self
    
    def predict(self, X):
        
        X = np.c_[X, np.ones(X.shape[0])]
        
        h_x = np.dot(X, self.theta)
        

        return h_x 
    
    def score(self, y, y_pred):
        return np.mean(y == y_pred)
    
    def get_theta(self):
        return self.theta
    
        
        

In [76]:
pipeline = Pipeline([
    ("Standardiser", StandardScaler()),
    ("model", Linear(1000, 0.01))

])


In [49]:
X = df.drop("BmiClass", axis = 1)
y = df["BmiClass"]


X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42)

In [77]:
pipeline.fit(X_train, y_train)

In [54]:
y_pred = pipeline.predict(X_test)

In [78]:
pipeline.named_steps["model"].get_theta()

array([0., 0., 0., 0., 0.])