In [1]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
# Read the data from the CSV file
df = pd.read_csv("/content/UserFitnessStats.csv")

# Separate the input features (X) and target variable (y)
X = df.drop("Activities Completed", axis=1)
y = df["Activities Completed"]

# Define the preprocessing steps for categorical and numerical features
categorical_features = ["Fitness Goals"]
numerical_features = ["BMI", "Height"]

categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

numerical_transformer = Pipeline(steps=[
    ('scaler', StandardScaler())
])

preprocessor = ColumnTransformer(transformers=[
    ('cat', categorical_transformer, categorical_features),
    ('num', numerical_transformer, numerical_features)
])

# Create an instance of the Random Forest classifier with preprocessor
clf = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('classifier', RandomForestClassifier())
])

# Fit the classifier to the data
clf.fit(X, y)

# Predict the recommended activities for new data
new_data = pd.DataFrame({"BMI": [25.0], "Height": [170], "Fitness Goals": ["Weight loss"]})
recommended_activities = clf.predict(new_data)
print("Recommended Activities:", recommended_activities)

Recommended Activities: ['Yoga']
