In [1]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.linear_model import SGDClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score,classification_report,confusion_matrix

In [2]:
data = pd.read_csv('bmi.csv')
data.head()

Unnamed: 0,Gender,Height,Weight,Index
0,Male,174,96,4
1,Male,189,87,2
2,Female,185,110,4
3,Female,195,104,3
4,Male,149,61,3


In [3]:
data.shape

(500, 4)

# Check whether data is null or not

In [4]:
data.isnull().sum()

Gender    0
Height    0
Weight    0
Index     0
dtype: int64

In [5]:
data['Index'].value_counts()

5    198
4    130
2     69
3     68
1     22
0     13
Name: Index, dtype: int64

# Convert categorical data into numerical using label encoding

In [6]:
le = LabelEncoder()
encoded = le.fit_transform(data['Gender'])

In [7]:
data.drop("Gender", axis=1, inplace=True)
data["Gender"] = encoded

In [8]:
data.head() # 0 : Female , 1 : Male

Unnamed: 0,Height,Weight,Index,Gender
0,174,96,4,1
1,189,87,2,1
2,185,110,4,0
3,195,104,3,0
4,149,61,3,1


# Variable of index :

### 0 - Extremely Week
### 1 - Weak
### 2 - Normal
### 3 - Overweight
### 4 - Obesity
### 5 - Extremely Obesity

# Applying Machine Learning Classification Algorithms

## Logistic Regression

In [9]:
X = data.drop('Index', axis=1)
y = data['Index']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
logreg_classifier = LogisticRegression(random_state=42)

In [13]:
logreg_classifier.fit(X_train, y_train)

In [14]:
y_pred = logreg_classifier.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.75      0.75      0.75         4
           2       0.89      0.89      0.89        18
           3       0.78      0.88      0.82         8
           4       0.97      0.93      0.95        30
           5       0.95      0.97      0.96        39

    accuracy                           0.92       100
   macro avg       0.72      0.74      0.73       100
weighted avg       0.91      0.92      0.92       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


## Decision Tree

In [15]:
dt_classifier = DecisionTreeClassifier(random_state=42)

In [16]:
dt_classifier.fit(X_train, y_train)

In [17]:
y_pred = dt_classifier.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.50      1.00      0.67         1
           1       1.00      1.00      1.00         4
           2       0.85      0.94      0.89        18
           3       0.78      0.88      0.82         8
           4       0.91      0.70      0.79        30
           5       0.88      0.95      0.91        39

    accuracy                           0.87       100
   macro avg       0.82      0.91      0.85       100
weighted avg       0.88      0.87      0.87       100



# Random Forest

In [18]:
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=21)

In [19]:
rf_classifier.fit(X_train, y_train)

In [20]:
y_pred = rf_classifier.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         1
           1       0.67      1.00      0.80         4
           2       0.94      0.83      0.88        18
           3       0.70      0.88      0.78         8
           4       0.92      0.77      0.84        30
           5       0.88      0.95      0.91        39

    accuracy                           0.87       100
   macro avg       0.85      0.90      0.87       100
weighted avg       0.88      0.87      0.87       100



## SVM

In [21]:
svm_classifier = SVC(kernel='linear', random_state=42)

In [22]:
svm_classifier.fit(X_train, y_train)

In [23]:
y_pred = svm_classifier.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.00      0.00      0.00         1
           1       0.67      1.00      0.80         4
           2       1.00      0.94      0.97        18
           3       0.73      1.00      0.84         8
           4       0.93      0.87      0.90        30
           5       0.97      0.95      0.96        39

    accuracy                           0.92       100
   macro avg       0.72      0.79      0.75       100
weighted avg       0.92      0.92      0.92       100



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [24]:
data.to_csv('Task9_Rohan_bmi.csv')