In [2]:
import numpy as np
import pandas as pd
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.base import BaseEstimator, ClassifierMixin
from sklearn.model_selection import train_test_split
import seaborn as sns

In [3]:
df = pd.read_csv("bmi.csv")

In [4]:
df.head(10)

Unnamed: 0,Age,Height,Weight,Bmi,BmiClass
0,61,1.85,109.3,31.93572,Obese Class 1
1,60,1.71,79.02,27.0237,Overweight
2,60,1.55,74.7,31.092612,Obese Class 1
3,60,1.46,35.9,16.841809,Underweight
4,60,1.58,97.1,38.89601,Obese Class 2
5,59,1.71,79.32,27.126295,Overweight
6,59,1.7,73.32,25.370242,Overweight
7,59,1.72,85.32,28.839913,Overweight
8,59,1.46,36.0,16.888722,Underweight
9,59,1.83,104.7,31.263997,Obese Class 1


In [5]:
df.isnull().sum()

Age         0
Height      0
Weight      0
Bmi         0
BmiClass    0
dtype: int64

In [6]:
maps = {
    "Normal Weight" : 1,
    "Overweight" : 2,
    "Underweight" : 3,
    "Obese Class 3" : 4,
    "Obese Class 2" : 5,
    "Obese Class 1" : 6,
    
}

In [7]:
df["BmiClass"] = df["BmiClass"].map(maps)

In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 741 entries, 0 to 740
Data columns (total 5 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Age       741 non-null    int64  
 1   Height    741 non-null    float64
 2   Weight    741 non-null    float64
 3   Bmi       741 non-null    float64
 4   BmiClass  741 non-null    int64  
dtypes: float64(3), int64(2)
memory usage: 29.1 KB


In [9]:
df = df.drop("BmiClass", axis = 1)

In [10]:
class Linear(BaseEstimator, ClassifierMixin):
    def __init__(self, n_iteration, alpha):
        self.n_iteration = n_iteration
        self.alpha = alpha
        self.cost_list = []
        
    def fit(self, X, y):
        X = np.c_[X, np.ones(X.shape[0])]
        self.theta = np.zeros(X.shape[1])
        m = X.shape[0]
        
        for i in range(self.n_iteration):
            h_x = np.dot(X, self.theta)

            cost = (1/m)* np.sum((y - h_x)**2)          

            d_cost = -(2/m)*(np.dot(X.T, (y - h_x)))

            self.cost_list.append(cost)
            self.theta -= self.alpha * d_cost
            
    
                
        return self
    
    def predict(self, X):
        
        X = np.c_[X, np.ones(X.shape[0])]
        
        h_x = np.dot(X, self.theta)
        

        return h_x 
    
    def score_metrics(self, X, y):
        
        y_pred = self.predict(X)
        
        print(f"R-squared = {1 - ((np.sum((y - y_pred)**2))/ (np.sum((y - np.mean(y))**2)))}")
        print(f"MSE = {(1/len(y))* np.sum((y-y_pred)**2)}")
        
    
    def score(self, X, y):
        
        y_pred = self.predict(X)
        
        return (1 - ((np.sum((y - y_pred)**2))/ (np.sum((y - np.mean(y))**2))))
        
        
        
        
        
    

    def get_theta(self):
        return self.theta
    
    
    
        
        

In [11]:
from sklearn.compose import TransformedTargetRegressor

pipeline = Pipeline([
    ("Standardiser", StandardScaler()),
    ("model", TransformedTargetRegressor(
        regressor=Linear(1000, 0.01),
        transformer=StandardScaler()))

])


In [12]:
X = df.drop("Bmi", axis = 1)
y = df["Bmi"]


X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.8, random_state = 42)

In [13]:
pipeline.fit(X_train, y_train)

In [14]:
pipeline.score(X_test, y_test)

0.9814169499771737

In [15]:
pipeline.predict(pd.DataFrame([[24, 1.75, 61]]))



array([19.62014358])

In [19]:
df[df["Age"]] == 24]

Unnamed: 0,Age,Height,Weight,Bmi
0,,,,
1,,,,
2,,,,
3,,,,
4,,,,
...,...,...,...,...
736,,,,
737,,,,
738,,,,
739,,,,


In [None]:
param_grid = {
    "n_iteration" : [500, 1000, 1500, 2000, 2500, 3000],
    "alpha": [0.1, 0.01, 0.001, 0.0001]
}

In [None]:
def grid(pipeline, param_grid, X_train, X_test, y_train, y_test):
    
    best_params = None
    best_score = -np.inf
    result = []
    
    for iteration in param_grid["n_iteration"]:
        for alpha in param_grid["alpha"]:
            
            
            
            pipeline.set_params(
                model__alpha = alpha,
                model__n_iteration = iteration
            )
            
            pipeline.fit(X_train, y_train)
            
            score= pipeline.score(X_test, y_test)
            result.append((iteration, alpha, score))
            
        
            
            if score >= best_score:
                best_score = score
                best_params ={
                    "n_iteration" : iteration,
                    "alpha": alpha
                }
                
    return result, best_params
    

In [None]:
grid(pipeline, param_grid, X_train, X_test, y_train, y_test)

In [None]:
pipeline.named_steps["model"].predict(pd.DataFrame([[24, 1.75, 61]]))

In [None]:
pipeline.predict(pd.DataFrame([[24, 1.75, 61]]))

In [None]:
y

In [None]:
y_pred = pipeline.named_steps["model"].predict(X_test)

In [None]:
y_pred

In [None]:
pipeline.score(X_test, y_test)

In [None]:
pipeline.score_metrics(X_test, y_test)

In [None]:
df[df["Age"] == 24]

In [None]:
df.corr()

In [None]:
sns.pairplot(df.corr())

In [None]:
np.argmax([0,0.0001])

In [None]:
z = np.zeros(4)

In [None]:
z = z.reshape((4,1))

In [None]:
z.shape

In [None]:
z