# Data Preprocessing for Gym Churn Risk Prediction

This notebook is used for data preprocessing tasks, including data cleaning, feature engineering, and preparing the dataset for model training.

In [1]:
import pandas as pd

# Load user data
users_data = pd.read_csv('../data/USERS_DATA.CSV')
subscription_plans = pd.read_csv('../data/SUBSCRIPTION_PLANS.CSV')
gym_locations = pd.read_csv('../data/GYM_LOCATIONS_DATA.CSV')

# Display the first few rows of the user data
users_data.head()

In [2]:
# Data Cleaning
# Check for missing values
missing_values = users_data.isnull().sum()
missing_values[missing_values > 0]

In [3]:
# Fill missing values or drop rows/columns as necessary
# Example: Filling missing ages with the median age
users_data['age'].fillna(users_data['age'].median(), inplace=True)

# Check the data types
users_data.dtypes

In [4]:
# Feature Engineering
# Convert sign_up_date to datetime
users_data['sign_up_date'] = pd.to_datetime(users_data['sign_up_date'])

# Create a new feature: account_age in days
users_data['account_age_days'] = (pd.to_datetime('today') - users_data['sign_up_date']).dt.days

# Display the updated dataframe
users_data.head()

In [5]:
# Merge with subscription plans to get price information
users_data = users_data.merge(subscription_plans, on='subscription_plan', how='left')

# Display the updated dataframe
users_data.head()

In [6]:
# Save the preprocessed data for model training
users_data.to_csv('../data/preprocessed_users_data.csv', index=False)