In [227]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split

In [228]:
df = pd.read_csv("bmi.csv")

In [229]:
df.head(10)

Unnamed: 0,Age,Height,Weight,Bmi,BmiClass
0,61,1.85,109.3,31.93572,Obese Class 1
1,60,1.71,79.02,27.0237,Overweight
2,60,1.55,74.7,31.092612,Obese Class 1
3,60,1.46,35.9,16.841809,Underweight
4,60,1.58,97.1,38.89601,Obese Class 2
5,59,1.71,79.32,27.126295,Overweight
6,59,1.7,73.32,25.370242,Overweight
7,59,1.72,85.32,28.839913,Overweight
8,59,1.46,36.0,16.888722,Underweight
9,59,1.83,104.7,31.263997,Obese Class 1


In [230]:
df.isnull().sum()

Age         0
Height      0
Weight      0
Bmi         0
BmiClass    0
dtype: int64

In [231]:
maps = {
    "Normal Weight" : 1,
    "Overweight" : 2,
    "Underweight" : 3,
    "Obese Class 3" : 4,
    "Obese Class 2" : 5,
    "Obese Class 1" : 6,
    
}

In [232]:
df["BmiClass"] = df["BmiClass"].map(maps)

In [233]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741 entries, 0 to 740
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Age       741 non-null    int64  
 1   Height    741 non-null    float64
 2   Weight    741 non-null    float64
 3   Bmi       741 non-null    float64
 4   BmiClass  741 non-null    int64  
dtypes: float64(3), int64(2)
memory usage: 29.1 KB


In [261]:
class Linear(BaseEstimator, ClassifierMixin):
    def __init__(self, n_iteration, alpha):
        self.n_iteration = n_iteration
        self.alpha = alpha
        self.cost_list = []
        
    def fit(self, X, y):
        X = np.c_[X, np.ones(X.shape[0])]
        self.theta = np.zeros(X.shape[1])
        m = X.shape[0]
        
        for i in range(self.n_iteration):
            h_x = np.dot(X, self.theta)

            cost = (1/m)* np.sum((y - h_x)**2)          

            d_cost = -(2/m)*(np.dot(X.T, (y - h_x)))

            self.cost_list.append(cost)
            self.theta -= self.alpha * d_cost
    
                
        return self
    
    def predict(self, X):
        
        X = np.c_[X, np.ones(X.shape[0])]
        
        h_x = np.dot(X, self.theta)
        

        return h_x 
    

    def get_theta(self):
        return self.theta
    
    
    
        
        

In [262]:
pipeline = Pipeline([
    ("Standardiser", StandardScaler()),
    ("model", Linear(1000, 0.01))

])


In [263]:
X = df.drop("BmiClass", axis = 1)
y = df["BmiClass"]


X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42)

In [264]:
pipeline.fit(X_train, y_train)

In [265]:
y_pred = pipeline.predict(X_test)

In [267]:
y_test

120    2
196    2
335    4
581    3
394    1
      ..
69     3
223    2
131    2
44     2
70     5
Name: BmiClass, Length: 149, dtype: int64

In [270]:
np.mean(y_pred.astype(int) == y_test)

0.5838926174496645

In [266]:
pipeline.score(y_pred.astype(int), y_test)



ValueError: Expected 2D array, got 1D array instead:
array=[2. 1. 5. 2. 1. 4. 2. 4. 4. 1. 1. 2. 1. 1. 2. 2. 2. 1. 2. 2. 1. 4. 2. 1.
 1. 1. 2. 1. 1. 1. 2. 2. 1. 1. 3. 3. 0. 1. 2. 2. 1. 1. 1. 1. 2. 1. 0. 2.
 1. 2. 1. 4. 1. 5. 1. 2. 1. 1. 2. 2. 2. 3. 1. 3. 2. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 2. 2. 1. 1. 2. 0. 1. 1. 2. 1. 2. 2. 1. 1. 1. 1. 2. 1. 1. 3. 1.
 3. 1. 5. 2. 1. 1. 2. 1. 3. 1. 4. 2. 2. 2. 2. 3. 2. 2. 2. 1. 1. 1. 2. 2.
 0. 1. 2. 1. 1. 1. 2. 1. 1. 1. 1. 2. 1. 2. 2. 1. 2. 3. 1. 4. 2. 0. 1. 1.
 2. 1. 2. 2. 4.].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.