In [1]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Step 2: Load the dataset
from google.colab import files
uploaded = files.upload()

# Assuming the file is named 'Obesity prediction.csv'
df = pd.read_csv('Obesity prediction.csv')

# Step 3: Data preprocessing
# Encode categorical variables
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Split the data into features and target
X = df.drop('Obesity', axis=1)
y = df['Obesity']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 4: Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Step 5: Evaluate the model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Step 6: Save the model (optional)
import joblib
joblib.dump(model, 'obesity_model.pkl')

Saving Obesity prediction.csv to Obesity prediction.csv
Accuracy: 0.9550827423167849
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.96      0.97        56
           1       0.89      0.90      0.90        62
           2       0.99      0.97      0.98        78
           3       0.97      0.98      0.97        58
           4       1.00      1.00      1.00        63
           5       0.88      0.89      0.88        56
           6       0.98      0.96      0.97        50

    accuracy                           0.96       423
   macro avg       0.95      0.95      0.95       423
weighted avg       0.96      0.96      0.96       423



['obesity_model.pkl']

In [4]:
# Step 1: Create a sample data point
# เราจะสร้างข้อมูลตัวอย่างที่มีลักษณะเหมือนกับข้อมูลที่ใช้เทรนโมเดล
# ตัวอย่างข้อมูล: [Gender, Age, Height, Weight, family_history, FAVC, FCVC, NCP, CAEC, SMOKE, CH2O, SCC, FAF, TUE, CALC, MTRANS]
# ข้อมูลนี้ต้องอยู่ในรูปแบบเดียวกับข้อมูลที่ใช้เทรนโมเดล (หลังจากทำการ encode และ scale แล้ว)

sample_data = {
    'Gender': 'Female',  # Female = 0, Male = 1 (หลังจาก encode)
    'Age': 25,
    'Height': 1.65,
    'Weight': 70,
    'family_history': 'yes',  # yes = 1, no = 0
    'FAVC': 'no',  # no = 0, yes = 1
    'FCVC': 2,  # Frequency of consuming vegetables (scale)
    'NCP': 3,  # Number of main meals
    'CAEC': 'Sometimes',  # Sometimes = 2, Frequently = 1, Always = 0 (หลังจาก encode)
    'SMOKE': 'no',  # no = 0, yes = 1
    'CH2O': 2,  # Daily water consumption (scale)
    'SCC': 'no',  # no = 0, yes = 1
    'FAF': 1,  # Physical activity frequency (scale)
    'TUE': 1,  # Time using technology devices (scale)
    'CALC': 'no',  # no = 0, Sometimes = 1, Frequently = 2 (หลังจาก encode)
    'MTRANS': 'Public_Transportation'  # Public_Transportation = 3, Automobile = 0, Walking = 4, Motorbike = 2, Bike = 1 (หลังจาก encode)
}

# Step 2: Convert the sample data to a DataFrame
sample_df = pd.DataFrame([sample_data])

# Step 3: Encode categorical variables using the same label encoders
for column in sample_df.select_dtypes(include=['object']).columns:
    if column in label_encoders:
        sample_df[column] = label_encoders[column].transform(sample_df[column])

# Step 4: Scale the features using the same scaler
sample_scaled = scaler.transform(sample_df)

# Step 5: Predict the obesity level using the trained model
prediction = model.predict(sample_scaled)

# Step 6: Decode the prediction to get the actual label
predicted_label = label_encoders['Obesity'].inverse_transform(prediction)

print("Predicted Obesity Level:", predicted_label[0])

Predicted Obesity Level: Normal_Weight


In [None]:
from google.colab import files
files.download('obesity_model.pkl')
files.download('label_encoders.pkl')
files.download('scaler.pkl')