In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import AdaBoostClassifier  # Import AdaBoostClassifier
from sklearn.metrics import classification_report
import joblib


In [5]:
# Data loading
def load_data(file_path):
    """Load a CSV file and return a pandas DataFrame."""
    return pd.read_csv(file_path)

# Data Preprocessing
def preprocess_data(df):
    """Preprocess the DataFrame by selecting relevant features and handling missing values."""
    # Select relevant feature columns
    feature_cols = [
        'acc_X', 'acc_Y', 'acc_Z',
        'mag_X', 'mag_Y', 'mag_Z',
        'gyro_X', 'gyro_Y', 'gyro_Z'
    ]
    selected_data = df[feature_cols]

    # Handle missing values (fill with zeros)
    selected_data = selected_data.fillna(0)

    # Extract the target label column
    labels = df['activity']

    return selected_data, labels


In [6]:
# Data Normalization
def normalize_data(data):
    """Normalize the data using MinMaxScaler."""
    scaler = MinMaxScaler()
    normalized_data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
    return normalized_data

# Feature extraction
def extract_features(window):
    """Extract statistical features from a window of data."""
    features = {}
    for col in window.columns:
        if len(window[col].unique()) > 1:  # Avoid constant columns
            features[f'{col}_mean'] = window[col].mean()
            features[f'{col}_std'] = window[col].std()
            features[f'{col}_min'] = window[col].min()
            features[f'{col}_max'] = window[col].max()
        else:
            # For constant columns
            features[f'{col}_mean'] = window[col].mean()
            features[f'{col}_std'] = 0
            features[f'{col}_min'] = window[col].min()
            features[f'{col}_max'] = window[col].max()
    return features

In [7]:
def create_feature_dataset(data, labels, window_size=50, step_size=25):
    """Create a dataset of features and corresponding labels from sliding windows."""
    X, y = [], []
    for start in range(0, len(data) - window_size, step_size):
        end = start + window_size
        window = data.iloc[start:end]
        label_window = labels.iloc[start:end]

        # Extract features from the window
        features = extract_features(window)
        X.append(features)

        # Assign the most frequent label in the window as the target label
        label = label_window.mode().iloc[0]  # Most frequent label
        y.append(label)

    # Convert to DataFrame and Series
    X = pd.DataFrame(X)
    y = pd.Series(y)

    return X, y

In [12]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.preprocessing import LabelEncoder
import joblib

def train_adaboost_model(X, y, save_path='movement_detection_adaboost_model.pkl', label_encoder_path='adaboost_label_encoder.pkl'):
    """Train and save an AdaBoost model."""

    # Encode labels
    label_encoder = LabelEncoder()
    y_encoded = label_encoder.fit_transform(y)

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    # Train AdaBoost classifier
    clf = AdaBoostClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    # Evaluate model
    y_pred = clf.predict(X_test)
    print("\n--- AdaBoost Classification Report ---\n")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))

    # Save model and encoder
    joblib.dump(clf, save_path)
    joblib.dump(label_encoder, label_encoder_path)
    print(f"AdaBoost model saved as '{save_path}'")
    print(f"Label encoder saved as '{label_encoder_path}'")

    return clf, label_encoder


In [13]:
# Balancing Data using SMOTE
def balance_data(X, y):
    """Balance the dataset using SMOTE."""
    smote = SMOTE(random_state=42)
    X_balanced, y_balanced = smote.fit_resample(X, y)
    return X_balanced, y_balanced

In [14]:
# Save the model
def save_model(model, label_encoder, output_path):
    """Save the trained model and label encoder to files."""
    joblib.dump(model, output_path)
    joblib.dump(label_encoder, output_path.replace('.pkl', '_label_encoder.pkl'))
    print(f"Model saved as '{output_path}'")
    print(f"Label encoder saved as '{output_path.replace('.pkl', '_label_encoder.pkl')}'")


In [25]:
# Main workflow
def main(file_path):
    # Load and preprocess data
    print("Loading data...")
    df = load_data(file_path)
    print("Data loaded successfully.")

    # Display unique activity labels
    unique_activities = df['activity'].unique()
    print("Unique activity labels:", unique_activities)

    data, labels = preprocess_data(df)
    print("Data preprocessed successfully.")

    # Normalize data
    print("Normalizing data...")
    data_normalized = normalize_data(data)
    print("Data normalized successfully.")

    # Balance data
    print("Balancing data using SMOTE...")
    X_balanced, y_balanced = balance_data(data_normalized, labels)
    print("Data balanced successfully.")

    # Extract features and labels
    print("Extracting features...")
    X, y = create_feature_dataset(X_balanced, y_balanced)
    print("Feature extraction completed.")

    # Train the model
    print("Training the model...")
    model_adaboost, label_encoder = train_adaboost_model(X, y)



    # Save the model
    save_model(model_adaboost, label_encoder, 'movement_detection_adaboost_model.pkl')

    return model_adaboost, label_encoder



In [23]:
# Run the main workflow
main('.idea/df.csv')

Loading data...
Data loaded successfully.
Unique activity labels: ['downstairs' 'running' 'standing' 'upstairs' 'walking']
Data preprocessed successfully.
Normalizing data...
Data normalized successfully.
Balancing data using SMOTE...
Data balanced successfully.
Extracting features...
Feature extraction completed.
Training the model...

--- AdaBoost Classification Report ---

              precision    recall  f1-score   support

  downstairs       0.48      0.39      0.43       680
     running       0.87      0.79      0.83       631
    standing       0.83      0.79      0.81       702
    upstairs       0.55      0.62      0.58       712
     walking       0.58      0.69      0.63       705

    accuracy                           0.65      3430
   macro avg       0.66      0.66      0.66      3430
weighted avg       0.66      0.65      0.65      3430

AdaBoost model saved as 'movement_detection_adaboost_model.pkl'
Label encoder saved as 'adaboost_label_encoder.pkl'
Model saved as '

(AdaBoostClassifier(n_estimators=100, random_state=42), LabelEncoder())

In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from imblearn.over_sampling import SMOTE
from sklearn.metrics import classification_report
import joblib

# Load dataset from CSV
def read_csv_data(path):
    return pd.read_csv(path)

# Select and clean data
def select_features_and_labels(dataframe):
    sensors = ['acc_X', 'acc_Y', 'acc_Z', 'mag_X', 'mag_Y', 'mag_Z', 'gyro_X', 'gyro_Y', 'gyro_Z']
    features = dataframe[sensors].fillna(0)
    targets = dataframe['activity']
    return features, targets

# Normalize the sensor data
def scale_features(features):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(features), columns=features.columns)

# Create statistical features per time window
def get_window_features(window):
    stats = {}
    for col in window.columns:
        values = window[col]
        stats[f'{col}_mean'] = values.mean()
        stats[f'{col}_std'] = values.std() if values.std() != np.nan else 0
        stats[f'{col}_min'] = values.min()
        stats[f'{col}_max'] = values.max()
    return stats

# Segment data and aggregate features
def generate_sliding_windows(data, labels, window_size=50, step=25):
    features, targets = [], []
    for i in range(0, len(data) - window_size, step):
        segment = data.iloc[i:i+window_size]
        segment_label = labels.iloc[i:i+window_size].mode().iloc[0]
        features.append(get_window_features(segment))
        targets.append(segment_label)
    return pd.DataFrame(features), pd.Series(targets)

# Apply SMOTE to balance the dataset
def resample_data(X, y):
    return SMOTE(random_state=42).fit_resample(X, y)

# Train the AdaBoost classifier and save components
def fit_and_store_model(X, y, model_file='movement_detection_adaboost_model.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    model = AdaBoostClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)
    print("\n--- AdaBoost Model Evaluation ---\n")
    print(classification_report(y_test, predictions, target_names=le.classes_))

    joblib.dump(model, model_file)
    joblib.dump(le, model_file.replace('.pkl', '_label_encoder.pkl'))
    print(f"Saved model to {model_file} and label encoder.")

    return model, le

# Main control function
def execute_pipeline(csv_path):
    print("Starting pipeline...")
    df = read_csv_data(csv_path)
    X_raw, y_raw = select_features_and_labels(df)
    X_scaled = scale_features(X_raw)
    print("Preprocessing complete.")

    X_balanced, y_balanced = resample_data(X_scaled, y_raw)
    print("Balancing done.")

    X_final, y_final = generate_sliding_windows(X_balanced, y_balanced)
    print("Feature extraction complete.")

    model, encoder = fit_and_store_model(X_final, y_final)
    print("Training complete.")
    return model, encoder

# Trigger the workflow
execute_pipeline('.idea/df.csv')


Starting pipeline...
Preprocessing complete.
Balancing done.
Feature extraction complete.

--- AdaBoost Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.48      0.39      0.43       680
     running       0.87      0.79      0.83       631
    standing       0.83      0.79      0.81       702
    upstairs       0.55      0.62      0.58       712
     walking       0.58      0.69      0.63       705

    accuracy                           0.65      3430
   macro avg       0.66      0.66      0.66      3430
weighted avg       0.66      0.65      0.65      3430

Saved model to movement_detection_adaboost_model.pkl and label encoder.
Training complete.


(AdaBoostClassifier(n_estimators=100, random_state=42), LabelEncoder())

In [11]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import joblib

# 1. Load CSV Data
def load_csv(path):
    return pd.read_csv(path)

# 2. Select and clean features
def select_features(df):
    features = ['acc_X', 'acc_Y', 'acc_Z', 'mag_X', 'mag_Y', 'mag_Z', 'gyro_X', 'gyro_Y', 'gyro_Z']
    X = df[features].fillna(0)
    y = df['activity']
    return X, y

# 3. Normalize data
def scale_data(X):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# 4. Windowed feature extraction
def extract_stat_features(window):
    stats = {}
    for col in window.columns:
        stats[f'{col}_mean'] = window[col].mean()
        stats[f'{col}_std'] = window[col].std() if window[col].nunique() > 1 else 0
        stats[f'{col}_min'] = window[col].min()
        stats[f'{col}_max'] = window[col].max()
    return stats

# 5. Sliding window feature creation
def generate_sliding_windows(X, y, size=50, step=25):
    X_feat, y_labels = [], []
    for start in range(0, len(X) - size, step):
        window = X.iloc[start:start + size]
        label_window = y.iloc[start:start + size]
        stats = extract_stat_features(window)
        X_feat.append(stats)
        y_labels.append(label_window.mode()[0])
    return pd.DataFrame(X_feat), pd.Series(y_labels)

# 6. Balance the dataset
def rebalance(X, y):
    sm = SMOTE(random_state=42)
    return sm.fit_resample(X, y)

# 7. Train AdaBoost and save
def fit_and_store_model(X, y, model_file='adaboost_model.pkl', encoder_file='label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    model = AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=5),
                               n_estimators=100, random_state=42)
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    print("\n--- AdaBoost Model Evaluation ---\n")
    print(classification_report(y_test, predictions, target_names=le.classes_))

    joblib.dump(model, model_file)
    joblib.dump(le, encoder_file)
    print(f"Model saved to: {model_file}")
    print(f"Label encoder saved to: {encoder_file}")

    return model, le

# 8. Main runner
def execute_pipeline(csv_path):
    print("Loading and preparing data...")
    df = load_csv(csv_path)
    X_raw, y_raw = select_features(df)
    X_scaled = scale_data(X_raw)

    print("Balancing dataset using SMOTE...")
    X_balanced, y_balanced = rebalance(X_scaled, y_raw)

    print("Extracting windowed features...")
    X_final, y_final = generate_sliding_windows(X_balanced, y_balanced)
    print("Feature extraction complete.")

    model, encoder = fit_and_store_model(X_final, y_final)
    print("Training complete.")
    return model, encoder

# Run the pipeline
execute_pipeline('.idea/df.csv')


Loading and preparing data...
Balancing dataset using SMOTE...
Extracting windowed features...
Feature extraction complete.

--- AdaBoost Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.98      0.95      0.97       680
     running       0.99      0.98      0.98       631
    standing       0.99      0.99      0.99       702
    upstairs       0.94      0.97      0.95       712
     walking       0.95      0.95      0.95       705

    accuracy                           0.97      3430
   macro avg       0.97      0.97      0.97      3430
weighted avg       0.97      0.97      0.97      3430

Model saved to: adaboost_model.pkl
Label encoder saved to: label_encoder.pkl
Training complete.


(AdaBoostClassifier(estimator=DecisionTreeClassifier(max_depth=5),
                    n_estimators=100, random_state=42),
 LabelEncoder())

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import joblib

# 1. Load CSV Data
def load_csv(path):
    return pd.read_csv(path)

# 2. Select and clean features
def select_features(df):
    features = ['acc_X', 'acc_Y', 'acc_Z', 'mag_X', 'mag_Y', 'mag_Z', 'gyro_X', 'gyro_Y', 'gyro_Z']
    X = df[features].fillna(0)
    y = df['activity']
    return X, y

# 3. Normalize data
def scale_data(X):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# 4. Extract statistical features for a single window
def extract_stat_features(window):
    stats = {}
    for col in window.columns:
        stats[f'{col}_mean'] = window[col].mean()
        stats[f'{col}_std'] = window[col].std()
        stats[f'{col}_min'] = window[col].min()
        stats[f'{col}_max'] = window[col].max()
    return stats

# 5. Fixed-size sliding window (in samples)
def generate_windows_by_sample_count(X, y, size=10, step=5):
    X_feat, y_labels = [], []
    for start in range(0, len(X) - size, step):
        window = X.iloc[start:start + size]
        label_window = y.iloc[start:start + size]
        stats = extract_stat_features(window)
        X_feat.append(stats)
        y_labels.append(label_window.mode()[0])  # Assign most common label in window
    return pd.DataFrame(X_feat), pd.Series(y_labels)

# 6. Balance dataset using SMOTE
def rebalance(X, y):
    sm = SMOTE(random_state=42)
    return sm.fit_resample(X, y)

# 7. Train and save AdaBoost model
def fit_and_store_model(X, y, model_file='adaboost_model.pkl', encoder_file='label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    model = AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=5),
        n_estimators=100,
        random_state=42
    )
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    print("\n--- AdaBoost Model Evaluation ---\n")
    print(classification_report(y_test, predictions, target_names=le.classes_))

    joblib.dump(model, model_file)
    joblib.dump(le, encoder_file)
    print(f"✅ Model saved to: {model_file}")
    print(f"✅ Label encoder saved to: {encoder_file}")

    return model, le

# 8. Main pipeline
def execute_pipeline(csv_path, window_size=10, window_step=5):
    print("📥 Loading and preparing data...")
    df = load_csv(csv_path)
    X_raw, y_raw = select_features(df)
    X_scaled = scale_data(X_raw)

    print("🔁 Balancing dataset using SMOTE...")
    X_balanced, y_balanced = rebalance(X_scaled, y_raw)

    print(f"🪟 Creating sliding windows of {window_size} samples with step {window_step}...")
    X_final, y_final = generate_windows_by_sample_count(X_balanced, y_balanced, size=window_size, step=window_step)
    print("📊 Feature extraction complete.")

    model, encoder = fit_and_store_model(X_final, y_final)
    print("✅ Training complete.")
    return model, encoder

# 🚀 Run the pipeline
if __name__ == "__main__":
    execute_pipeline(".idea/df.csv", window_size=50, window_step=5)


📥 Loading and preparing data...
🔁 Balancing dataset using SMOTE...
🪟 Creating sliding windows of 50 samples with step 5...
📊 Feature extraction complete.

--- AdaBoost Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.99      0.96      0.98      3413
     running       1.00      0.99      0.99      3393
    standing       1.00      1.00      1.00      3531
    upstairs       0.96      0.98      0.97      3346
     walking       0.98      0.98      0.98      3467

    accuracy                           0.98     17150
   macro avg       0.98      0.98      0.98     17150
weighted avg       0.98      0.98      0.98     17150

✅ Model saved to: adaboost_model.pkl
✅ Label encoder saved to: label_encoder.pkl
✅ Training complete.


In [6]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report
import joblib

# 1. Load CSV Data
def load_csv(path):
    return pd.read_csv(path)

# 2. Select and clean features
def select_features(df):
    features = ['acc_X', 'acc_Y', 'acc_Z', 'mag_X', 'mag_Y', 'mag_Z', 'gyro_X', 'gyro_Y', 'gyro_Z']
    X = df[features].fillna(0)
    y = df['activity']
    return X, y

# 3. Normalize data
def scale_data(X):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# 4. Extract statistical features for a single window
def extract_stat_features(window):
    stats = {}
    for col in window.columns:
        stats[f'{col}_mean'] = window[col].mean()
        stats[f'{col}_std'] = window[col].std()
        stats[f'{col}_min'] = window[col].min()
        stats[f'{col}_max'] = window[col].max()
    return stats

# 5. Fixed-size sliding window (in samples)
def generate_windows_by_sample_count(X, y, size=10, step=5):
    X_feat, y_labels = [], []
    for start in range(0, len(X) - size, step):
        window = X.iloc[start:start + size]
        label_window = y.iloc[start:start + size]
        stats = extract_stat_features(window)
        X_feat.append(stats)
        y_labels.append(label_window.mode()[0])  # Assign most common label in window
    return pd.DataFrame(X_feat), pd.Series(y_labels)

# 6. Train and save AdaBoost model
def fit_and_store_model(X, y, model_file='adaboost_model.pkl', encoder_file='label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)

    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

    model = AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=5),
        n_estimators=100,
        random_state=42
    )
    model.fit(X_train, y_train)

    predictions = model.predict(X_test)

    print("\n--- AdaBoost Model Evaluation ---\n")
    print(classification_report(y_test, predictions, target_names=le.classes_))

    joblib.dump(model, model_file)
    joblib.dump(le, encoder_file)
    print(f"✅ Model saved to: {model_file}")
    print(f"✅ Label encoder saved to: {encoder_file}")

    return model, le

# 7. Main pipeline
def execute_pipeline(csv_path, window_size=10, window_step=5):
    print("📥 Loading and preparing data...")
    df = load_csv(csv_path)
    X_raw, y_raw = select_features(df)
    X_scaled = scale_data(X_raw)

    print(f"🪟 Creating sliding windows of {window_size} samples with step {window_step}...")
    X_final, y_final = generate_windows_by_sample_count(X_scaled, y_raw, size=window_size, step=window_step)
    print("📊 Feature extraction complete.")

    model, encoder = fit_and_store_model(X_final, y_final)
    print("✅ Training complete.")
    return model, encoder

# 🚀 Run the pipeline
if __name__ == "__main__":
    execute_pipeline(".idea/df.csv", window_size=50, window_step=5)


📥 Loading and preparing data...
🪟 Creating sliding windows of 50 samples with step 5...
📊 Feature extraction complete.

--- AdaBoost Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.98      0.97      0.98      3282
     running       1.00      0.99      1.00      1918
    standing       1.00      1.00      1.00      3275
    upstairs       0.94      0.98      0.96      2885
     walking       0.98      0.96      0.97      3483

    accuracy                           0.98     14843
   macro avg       0.98      0.98      0.98     14843
weighted avg       0.98      0.98      0.98     14843

✅ Model saved to: adaboost_model.pkl
✅ Label encoder saved to: label_encoder.pkl
✅ Training complete.


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
import joblib

# 1. Load Data
def load_csv(path):
    return pd.read_csv(path)

# 2. Feature Selection
def select_features(df):
    features = ['acc_X', 'acc_Y', 'acc_Z', 'mag_X', 'mag_Y', 'mag_Z', 'gyro_X', 'gyro_Y', 'gyro_Z']
    return df[features].fillna(0), df['activity']

# 3. Normalize
def scale_data(X):
    scaler = MinMaxScaler()
    return pd.DataFrame(scaler.fit_transform(X), columns=X.columns)

# 4. Windowed Data Generator for LSTM
def create_lstm_windows(X, y, window_size=50, step=25):
    X_seq, y_seq = [], []
    for start in range(0, len(X) - window_size, step):
        end = start + window_size
        segment = X.iloc[start:end].values
        label_window = y.iloc[start:end]
        label = label_window.mode()[0]
        X_seq.append(segment)
        y_seq.append(label)
    return np.array(X_seq), np.array(y_seq)

# 5. Apply SMOTE to 2D features before reshaping for LSTM
def rebalance(X, y):
    sm = SMOTE(random_state=42)
    X_2d = X.reshape((X.shape[0], -1))  # Flatten for SMOTE
    X_bal, y_bal = sm.fit_resample(X_2d, y)
    X_bal = X_bal.reshape((-1, 50, 9))  # Reshape back
    return X_bal, y_bal

# 6. Build and Train LSTM Model
def build_and_train_lstm(X, y, model_path='lstm_model.h5', encoder_path='lstm_label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_categorical = to_categorical(y_encoded)

    X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

    model = Sequential([
        LSTM(64, input_shape=(X.shape[1], X.shape[2]), return_sequences=False),
        Dropout(0.4),
        Dense(64, activation='relu'),
        Dense(y_categorical.shape[1], activation='softmax')
    ])

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=15, batch_size=64, validation_split=0.2, verbose=1)

    # Evaluate
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)
    print("\n--- LSTM Model Evaluation ---\n")
    print(classification_report(y_true_classes, y_pred_classes, target_names=le.classes_))

    # Save model and encoder
    model.save(model_path)
    joblib.dump(le, encoder_path)
    print(f"LSTM model saved to '{model_path}'")
    print(f"Label encoder saved to '{encoder_path}'")

    return model, le

# 7. Full Runner
def run_lstm_pipeline(path):
    print("Loading and preparing data...")
    df = load_csv(path)
    X_raw, y_raw = select_features(df)
    X_scaled = scale_data(X_raw)

    print("Creating LSTM windows...")
    X_windowed, y_windowed = create_lstm_windows(X_scaled, y_raw)

    print("Balancing data with SMOTE...")
    X_balanced, y_balanced = rebalance(X_windowed, y_windowed)

    print("Training LSTM model...")
    model, encoder = build_and_train_lstm(X_balanced, y_balanced)
    print("LSTM training complete.")
    return model, encoder

# 🔥 Execute the pipeline
run_lstm_pipeline('.idea/df.csv')


In [7]:
import numpy as np
import pandas as pd
import joblib

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from imblearn.over_sampling import SMOTE

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping

# --- Step 1: Load and preprocess data ---
def load_and_preprocess_data(csv_path):
    df = pd.read_csv(csv_path)
    df = df.dropna()

    # Separate features and label
    X = df.drop('activity', axis=1)
    y = df['activity']

    # Normalize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# --- Step 2: Apply SMOTE to balance classes ---
def rebalance(X, y):
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

# --- Step 3: Generate sliding windows and statistical features ---
def generate_sliding_windows(X, y, window_size=50, step_size=25):
    X = np.array(X)
    y = np.array(y)

    X_windows = []
    y_windows = []

    for start in range(0, len(X) - window_size, step_size):
        end = start + window_size
        window = X[start:end]
        label = y[start:end]

        # Use the most frequent label in the window
        unique, counts = np.unique(label, return_counts=True)
        dominant_label = unique[np.argmax(counts)]

        # Reshape to (window_size, num_features)
        X_windows.append(window)
        y_windows.append(dominant_label)

    return np.array(X_windows), np.array(y_windows)

# --- Step 4: Build and train LSTM model ---
def build_and_train_lstm(X, y, model_path='lstm_model.h5', encoder_path='lstm_label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_categorical = to_categorical(y_encoded)

    X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

    model = Sequential([
        Input(shape=(X.shape[1], X.shape[2])),
        LSTM(128, return_sequences=True),
        Dropout(0.3),
        LSTM(64),
        Dropout(0.3),
        Dense(64, activation='relu'),
        Dense(y_categorical.shape[1], activation='softmax')
    ])

    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )

    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        epochs=25,
        batch_size=64,
        validation_split=0.2,
        callbacks=[early_stop],
        verbose=1
    )

    # Evaluation
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    print("\n--- Improved LSTM Model Evaluation ---\n")
    print(classification_report(y_true_classes, y_pred_classes, target_names=le.classes_))

    model.save(model_path)
    joblib.dump(le, encoder_path)
    print(f"LSTM model saved to '{model_path}'")
    print(f"Label encoder saved to '{encoder_path}'")

    return model, le

# --- Step 5: Run the full pipeline ---
def run_lstm_pipeline(path):
    print("Loading and preprocessing data...")
    X_raw, y_raw = load_and_preprocess_data(path)

    print("Rebalancing classes using SMOTE...")
    X_balanced, y_balanced = rebalance(X_raw, y_raw)

    print("Generating sliding windows and extracting features...")
    X_windowed, y_windowed = generate_sliding_windows(X_balanced, y_balanced)

    print("Training LSTM model...")
    model, encoder = build_and_train_lstm(X_windowed, y_windowed)

    print("LSTM training complete.")
    return model, encoder

# 🔥 Execute the pipeline
run_lstm_pipeline('.idea/df.csv')


Loading and preprocessing data...
Rebalancing classes using SMOTE...
Generating sliding windows and extracting features...
Training LSTM model...
Epoch 1/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 93ms/step - accuracy: 0.5215 - loss: 1.1679 - val_accuracy: 0.7598 - val_loss: 0.6901
Epoch 2/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 89ms/step - accuracy: 0.7697 - loss: 0.6480 - val_accuracy: 0.7945 - val_loss: 0.5959
Epoch 3/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 104ms/step - accuracy: 0.8046 - loss: 0.5664 - val_accuracy: 0.8243 - val_loss: 0.5128
Epoch 4/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 95ms/step - accuracy: 0.8349 - loss: 0.4760 - val_accuracy: 0.8513 - val_loss: 0.4058
Epoch 5/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 89ms/step - accuracy: 0.8593 - loss: 0.4079 - val_accuracy: 0.8724 - val_loss: 0.3698
Epoch 6/25
[1m172/172[0m [32m━




--- Improved LSTM Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.88      0.89      0.88       680
     running       0.95      0.90      0.93       631
    standing       0.93      0.97      0.95       702
    upstairs       0.88      0.88      0.88       712
     walking       0.85      0.84      0.84       705

    accuracy                           0.90      3430
   macro avg       0.90      0.90      0.90      3430
weighted avg       0.90      0.90      0.90      3430

LSTM model saved to 'lstm_model.h5'
Label encoder saved to 'lstm_label_encoder.pkl'
LSTM training complete.


(<Sequential name=sequential_1, built=True>, LabelEncoder())

In [10]:
import numpy as np
import pandas as pd
import joblib

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input, LayerNormalization, MultiHeadAttention, GlobalAveragePooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping


# --- Step 1: Load and preprocess data ---
def load_and_preprocess_data(csv_path):
    df = pd.read_csv(csv_path)
    df = df.dropna()

    # Separate features and label
    X = df.drop('activity', axis=1)
    y = df['activity']

    # Normalize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y


# --- Step 2: Apply SMOTE to balance classes ---
def rebalance(X, y):
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled


# --- Step 3: Generate sliding windows and statistical features ---
def generate_sliding_windows(X, y, window_size=50, step_size=25):
    X = np.array(X)
    y = np.array(y)

    X_windows = []
    y_windows = []

    for start in range(0, len(X) - window_size, step_size):
        end = start + window_size
        window = X[start:end]
        label = y[start:end]

        # Use the most frequent label in the window
        unique, counts = np.unique(label, return_counts=True)
        dominant_label = unique[np.argmax(counts)]

        # Reshape to (window_size, num_features)
        X_windows.append(window)
        y_windows.append(dominant_label)

    return np.array(X_windows), np.array(y_windows)


# --- Step 4: Build and train Transformer model ---
def build_and_train_transformer(X, y, model_path='transformer_model.h5', encoder_path='transformer_label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_categorical = to_categorical(y_encoded)

    X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

    # Define Transformer model
    inputs = Input(shape=(X.shape[1], X.shape[2]))

    # Multi-head self-attention
    x = MultiHeadAttention(num_heads=4, key_dim=64)(inputs, inputs)
    x = Dropout(0.3)(x)
    x = LayerNormalization()(x)

    # Global Average Pooling
    x = GlobalAveragePooling1D()(x)

    # Fully connected layers
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(y_categorical.shape[1], activation='softmax')(x)

    # Build the model
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        loss='categorical_crossentropy',
        optimizer='adam',
        metrics=['accuracy']
    )

    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        epochs=25,
        batch_size=64,
        validation_split=0.2,
        callbacks=[early_stop],
        verbose=1
    )

    # Evaluation
    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    print("\n--- Improved Transformer Model Evaluation ---\n")
    print(classification_report(y_true_classes, y_pred_classes, target_names=le.classes_))

    model.save(model_path)
    joblib.dump(le, encoder_path)
    print(f"Transformer model saved to '{model_path}'")
    print(f"Label encoder saved to '{encoder_path}'")

    return model, le


# --- Step 5: Convert to TFLite ---
def convert_to_tflite(model, tflite_model_path='transformer_model.tflite'):
    # Convert the model to TFLite format
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()

    # Save the TFLite model
    with open(tflite_model_path, 'wb') as f:
        f.write(tflite_model)
    print(f"TFLite model saved to '{tflite_model_path}'")


# --- Step 6: Run the full pipeline ---
def run_transformer_pipeline(path):
    print("Loading and preprocessing data...")
    X_raw, y_raw = load_and_preprocess_data(path)

    print("Rebalancing classes using SMOTE...")
    X_balanced, y_balanced = rebalance(X_raw, y_raw)

    print("Generating sliding windows and extracting features...")
    X_windowed, y_windowed = generate_sliding_windows(X_balanced, y_balanced)

    print("Training Transformer model...")
    model, encoder = build_and_train_transformer(X_windowed, y_windowed)

    print("Converting model to TFLite format...")
    convert_to_tflite(model)

    print("Transformer training complete.")
    return model, encoder


# 🔥 Execute the pipeline
run_transformer_pipeline('.idea/df.csv')


Loading and preprocessing data...
Rebalancing classes using SMOTE...
Generating sliding windows and extracting features...
Training Transformer model...
Epoch 1/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.4242 - loss: 1.3716 - val_accuracy: 0.6297 - val_loss: 0.9767
Epoch 2/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 40ms/step - accuracy: 0.6210 - loss: 0.9831 - val_accuracy: 0.6571 - val_loss: 0.8681
Epoch 3/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 39ms/step - accuracy: 0.6756 - loss: 0.8782 - val_accuracy: 0.6840 - val_loss: 0.8116
Epoch 4/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 38ms/step - accuracy: 0.6864 - loss: 0.8397 - val_accuracy: 0.7208 - val_loss: 0.7593
Epoch 5/25
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 38ms/step - accuracy: 0.7164 - loss: 0.7828 - val_accuracy: 0.7216 - val_loss: 0.7321
Epoch 6/25
[1m172/172[0m [32m




--- Improved Transformer Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.70      0.75      0.72       680
     running       0.94      0.82      0.88       631
    standing       0.89      0.95      0.92       702
    upstairs       0.79      0.77      0.78       712
     walking       0.75      0.75      0.75       705

    accuracy                           0.81      3430
   macro avg       0.81      0.81      0.81      3430
weighted avg       0.81      0.81      0.81      3430

Transformer model saved to 'transformer_model.h5'
Label encoder saved to 'transformer_label_encoder.pkl'
Converting model to TFLite format...
INFO:tensorflow:Assets written to: C:\Users\EGYPT\AppData\Local\Temp\tmpejojj_09\assets


INFO:tensorflow:Assets written to: C:\Users\EGYPT\AppData\Local\Temp\tmpejojj_09\assets


Saved artifact at 'C:\Users\EGYPT\AppData\Local\Temp\tmpejojj_09'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 50, 9), dtype=tf.float32, name='keras_tensor_12')
Output Type:
  TensorSpec(shape=(None, 5), dtype=tf.float32, name=None)
Captures:
  2156304991632: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331958352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331958544: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331959888: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331960848: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331960656: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331959120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156331960272: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156385694736: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156385695120: TensorSpec(shape=(), dtype=tf.resource, name=None)
  21563856

(<Functional name=functional_2, built=True>, LabelEncoder())

In [11]:
import numpy as np
import pandas as pd
import joblib

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from imblearn.over_sampling import SMOTE

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Dropout, LayerNormalization,
    MultiHeadAttention, GlobalAveragePooling1D
)
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical

# --- Step 1: Load and preprocess data ---
def load_and_preprocess_data(csv_path):
    df = pd.read_csv(csv_path)
    df = df.dropna()

    X = df.drop('activity', axis=1)
    y = df['activity']

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    return X_scaled, y

# --- Step 2: Apply SMOTE to balance classes ---
def rebalance(X, y):
    smote = SMOTE(random_state=42)
    X_resampled, y_resampled = smote.fit_resample(X, y)
    return X_resampled, y_resampled

# --- Step 3: Generate sliding windows ---
def generate_sliding_windows(X, y, window_size=50, step_size=25):
    X = np.array(X)
    y = np.array(y)

    X_windows = []
    y_windows = []

    for start in range(0, len(X) - window_size, step_size):
        end = start + window_size
        window = X[start:end]
        label = y[start:end]

        # Use the most frequent label in the window
        unique, counts = np.unique(label, return_counts=True)
        dominant_label = unique[np.argmax(counts)]

        X_windows.append(window)
        y_windows.append(dominant_label)

    return np.array(X_windows), np.array(y_windows)

# --- Step 4: Positional Encoding ---
class PositionalEncoding(tf.keras.layers.Layer):
    def __init__(self, sequence_len, d_model):
        super().__init__()
        self.pos_encoding = self.positional_encoding(sequence_len, d_model)

    def get_angles(self, pos, i, d_model):
        angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(d_model))
        return pos * angle_rates

    def positional_encoding(self, sequence_len, d_model):
        angle_rads = self.get_angles(np.arange(sequence_len)[:, np.newaxis],
                                     np.arange(d_model)[np.newaxis, :],
                                     d_model)
        angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
        angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
        return tf.cast(angle_rads[np.newaxis, ...], dtype=tf.float32)

    def call(self, inputs):
        return inputs + self.pos_encoding[:, :tf.shape(inputs)[1], :]

# --- Step 5: Transformer Encoder Block ---
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    # Multi-head self-attention
    x = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(inputs, inputs)
    x = Dropout(dropout)(x)
    x = LayerNormalization(epsilon=1e-6)(x + inputs)

    # Feed-forward
    ffn = tf.keras.Sequential([
        Dense(ff_dim, activation='relu'),
        Dropout(dropout),
        Dense(inputs.shape[-1])
    ])
    x2 = ffn(x)
    x = LayerNormalization(epsilon=1e-6)(x + x2)
    return x

# --- Step 6: Build Transformer Model ---
def build_transformer_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = PositionalEncoding(input_shape[0], input_shape[1])(inputs)

    for _ in range(2):  # Stack multiple transformer blocks
        x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128, dropout=0.3)

    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation='softmax')(x)

    model = Model(inputs, outputs)
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# --- Step 7: Train & Save Transformer Model ---
def build_and_train_transformer(X, y, model_path='transformer_model.h5', encoder_path='transformer_label_encoder.pkl'):
    le = LabelEncoder()
    y_encoded = le.fit_transform(y)
    y_categorical = to_categorical(y_encoded)

    X_train, X_test, y_train, y_test = train_test_split(X, y_categorical, test_size=0.2, random_state=42)

    model = build_transformer_model(X.shape[1:], y_categorical.shape[1])

    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=64,
        validation_split=0.2,
        callbacks=[early_stop],
        verbose=1
    )

    y_pred = model.predict(X_test)
    y_pred_classes = np.argmax(y_pred, axis=1)
    y_true_classes = np.argmax(y_test, axis=1)

    print("\n--- Transformer Model Evaluation ---\n")
    print(classification_report(y_true_classes, y_pred_classes, target_names=le.classes_))

    model.save(model_path)
    joblib.dump(le, encoder_path)
    print(f"Transformer model saved to '{model_path}'")
    print(f"Label encoder saved to '{encoder_path}'")

    return model, le

# --- Step 8: Run Full Pipeline ---
def run_transformer_pipeline(path):
    print("Loading and preprocessing data...")
    X_raw, y_raw = load_and_preprocess_data(path)

    print("Rebalancing classes using SMOTE...")
    X_balanced, y_balanced = rebalance(X_raw, y_raw)

    print("Generating sliding windows...")
    X_windowed, y_windowed = generate_sliding_windows(X_balanced, y_balanced)

    print("Training Transformer model...")
    model, encoder = build_and_train_transformer(X_windowed, y_windowed)

    print("Converting model to TFLite format...")
    converter = tf.lite.TFLiteConverter.from_keras_model(model)
    tflite_model = converter.convert()

    with open("transformer_model.tflite", "wb") as f:
        f.write(tflite_model)
    print("TFLite model saved to 'transformer_model.tflite'")

    return model, encoder

# 🔥 Run it!
run_transformer_pipeline('.idea/df.csv')


Loading and preprocessing data...
Rebalancing classes using SMOTE...
Generating sliding windows...
Training Transformer model...






Epoch 1/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 86ms/step - accuracy: 0.3119 - loss: 1.5384 - val_accuracy: 0.6013 - val_loss: 1.0471
Epoch 2/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 89ms/step - accuracy: 0.5918 - loss: 1.0745 - val_accuracy: 0.7099 - val_loss: 0.7816
Epoch 3/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 82ms/step - accuracy: 0.6744 - loss: 0.8926 - val_accuracy: 0.7609 - val_loss: 0.6904
Epoch 4/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 80ms/step - accuracy: 0.7299 - loss: 0.7744 - val_accuracy: 0.7642 - val_loss: 0.6588
Epoch 5/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 81ms/step - accuracy: 0.7441 - loss: 0.7358 - val_accuracy: 0.7773 - val_loss: 0.6345
Epoch 6/50
[1m172/172[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 76ms/step - accuracy: 0.7537 - loss: 0.7162 - val_accuracy: 0.7843 - val_loss: 0.6111
Epoch 7/50
[1m1




--- Transformer Model Evaluation ---

              precision    recall  f1-score   support

  downstairs       0.68      0.88      0.77       680
     running       0.95      0.87      0.91       631
    standing       0.93      0.94      0.93       702
    upstairs       0.81      0.75      0.78       712
     walking       0.86      0.76      0.81       705

    accuracy                           0.84      3430
   macro avg       0.85      0.84      0.84      3430
weighted avg       0.85      0.84      0.84      3430

Transformer model saved to 'transformer_model.h5'
Label encoder saved to 'transformer_label_encoder.pkl'
Converting model to TFLite format...
INFO:tensorflow:Assets written to: C:\Users\EGYPT\AppData\Local\Temp\tmpf8a_mjr4\assets


INFO:tensorflow:Assets written to: C:\Users\EGYPT\AppData\Local\Temp\tmpf8a_mjr4\assets


Saved artifact at 'C:\Users\EGYPT\AppData\Local\Temp\tmpf8a_mjr4'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 50, 9), dtype=tf.float32, name='keras_tensor_20')
Output Type:
  TensorSpec(shape=(None, 5), dtype=tf.float32, name=None)
Captures:
  2156521755792: TensorSpec(shape=(1, 50, 9), dtype=tf.float32, name=None)
  2156304990288: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521752144: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521750608: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521760976: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521752336: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521761360: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521760784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521760016: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2156521758864: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2

(<Functional name=functional_5, built=True>, LabelEncoder())

In [9]:
import pandas as pd
import numpy as np
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, accuracy_score
from sklearn.preprocessing import LabelEncoder
import joblib
import os

# 1. Load HAR dataset
def load_har_dataset(data_dir):
    # Features
    X_train = pd.read_csv(os.path.join(data_dir, "train", "X_train.txt"), delim_whitespace=True, header=None)
    X_test = pd.read_csv(os.path.join(data_dir, "test", "X_test.txt"), delim_whitespace=True, header=None)

    # Labels
    y_train = pd.read_csv(os.path.join(data_dir, "train", "y_train.txt"), delim_whitespace=True, header=None)[0]
    y_test = pd.read_csv(os.path.join(data_dir, "test", "y_test.txt"), delim_whitespace=True, header=None)[0]

    return X_train, X_test, y_train, y_test

# 2. Train AdaBoost model
def train_adaboost(X_train, y_train):
    print("🎯 Training AdaBoost model...")
    model = AdaBoostClassifier(
        estimator=DecisionTreeClassifier(max_depth=5),
        n_estimators=100,
        random_state=42
    )

    model.fit(X_train, y_train)
    return model

# 3. Evaluate model
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("\n📈 Classification Report:\n")
    print(classification_report(y_test, y_pred))
    print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")

# 4. Save model
def save_model(model, path="har_adaboost_model.pkl"):
    joblib.dump(model, path)
    print(f"💾 Model saved to: {path}")

# 5. Main
def main():
    data_dir = "./UCI HAR Dataset"
    X_train, X_test, y_train, y_test = load_har_dataset(data_dir)

    model = train_adaboost(X_train, y_train)
    evaluate_model(model, X_test, y_test)
    save_model(model)

if __name__ == "__main__":
    main()


  X_train = pd.read_csv(os.path.join(data_dir, "train", "X_train.txt"), delim_whitespace=True, header=None)
  X_test = pd.read_csv(os.path.join(data_dir, "test", "X_test.txt"), delim_whitespace=True, header=None)
  y_train = pd.read_csv(os.path.join(data_dir, "train", "y_train.txt"), delim_whitespace=True, header=None)[0]
  y_test = pd.read_csv(os.path.join(data_dir, "test", "y_test.txt"), delim_whitespace=True, header=None)[0]


🎯 Training AdaBoost model...

📈 Classification Report:

              precision    recall  f1-score   support

           1       0.94      0.96      0.95       496
           2       0.90      0.94      0.92       471
           3       0.98      0.92      0.95       420
           4       0.95      0.87      0.91       491
           5       0.89      0.96      0.92       532
           6       1.00      1.00      1.00       537

    accuracy                           0.94      2947
   macro avg       0.94      0.94      0.94      2947
weighted avg       0.94      0.94      0.94      2947

✅ Accuracy: 0.9427
💾 Model saved to: har_adaboost_model.pkl


In [6]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import joblib

# Paths (Save directly in the current directory)
PKL_PATH = "har_rf_model.pkl"
ONNX_PATH = "har_rf_model.onnx"
DATASET_DIR = "C:\\Users\\EGYPT\\Downloads\\src\\Mobile\\UCI HAR Dataset"

def load_signal(sensor, axis, split="train"):
    path = f"{DATASET_DIR}/{split}/Inertial Signals/{sensor}_{axis}_{split}.txt"
    return pd.read_csv(path, delim_whitespace=True, header=None)

def extract_features_and_labels(split="train"):
    sensors = ["body_acc", "body_gyro", "total_acc"]
    axes = ["x", "y", "z"]
    signals = [load_signal(s, a, split) for s in sensors for a in axes]

    features = []
    for i in range(signals[0].shape[0]):
        f = []
        for sig in signals:
            row = sig.iloc[i]
            f.extend([row.mean(), row.std(), row.min(), row.max()])
        features.append(f)

    labels_path = f"{DATASET_DIR}/{split}/y_{split}.txt"
    labels = pd.read_csv(labels_path, delim_whitespace=True, header=None)[0]
    labels = labels - 1  # Shift to 0–5
    return np.array(features), labels

# Load data
X_train, y_train = extract_features_and_labels("train")
X_test, y_test = extract_features_and_labels("test")

# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train Random Forest
model = RandomForestClassifier(
    n_estimators=100,
    max_depth=15,
    class_weight='balanced',
    random_state=42
)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("📊 Random Forest Report:\n", classification_report(y_test, y_pred))
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")

# Save model in current directory
joblib.dump(model, PKL_PATH)

# Export to ONNX in current directory
initial_type = [('float_input', FloatTensorType([None, 36]))]
onnx_model = convert_sklearn(model, initial_types=initial_type)
with open(ONNX_PATH, "wb") as f:
    f.write(onnx_model.SerializeToString())

print(f"✅ Random Forest model saved to: {PKL_PATH}")
print(f"✅ Random Forest ONNX saved to: {ONNX_PATH}")


  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  labels = pd.read_csv(labels_path, delim_whitespace=True, header=None)[0]
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace

📊 Random Forest Report:
               precision    recall  f1-score   support

           0       0.67      0.73      0.70       496
           1       0.70      0.72      0.71       471
           2       0.89      0.78      0.83       420
           3       0.89      0.81      0.85       491
           4       0.84      0.91      0.87       532
           5       1.00      1.00      1.00       537

    accuracy                           0.83      2947
   macro avg       0.83      0.82      0.83      2947
weighted avg       0.83      0.83      0.83      2947

✅ Accuracy: 0.8297
✅ Random Forest model saved to: har_rf_model.pkl
✅ Random Forest ONNX saved to: har_rf_model.onnx


In [9]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, f1_score
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import joblib

# Paths
PKL_PATH = "har_adaboost_model.pkl"
ONNX_PATH = "har_adaboost_model.onnx"
DATASET_DIR = "C:\\Users\\EGYPT\\Downloads\\src\\Mobile\\UCI HAR Dataset"

def load_signal(sensor, axis, split="train"):
    path = f"{DATASET_DIR}/{split}/Inertial Signals/{sensor}_{axis}_{split}.txt"
    return pd.read_csv(path, delim_whitespace=True, header=None)

def extract_features_and_labels(split="train"):
    sensors = ["body_acc", "body_gyro", "total_acc"]
    axes = ["x", "y", "z"]
    signals = [load_signal(s, a, split) for s in sensors for a in axes]

    features = []
    for i in range(signals[0].shape[0]):
        f = []
        for sig in signals:
            row = sig.iloc[i]
            f.extend([
                row.mean(),
                row.std(),
                row.min(),
                row.max(),
                np.sum(row**2)/len(row),        # Energy
                np.sum(np.abs(row))/len(row)    # SMA
            ])
        features.append(f)

    labels_path = f"{DATASET_DIR}/{split}/y_{split}.txt"
    labels = pd.read_csv(labels_path, delim_whitespace=True, header=None)[0]
    labels = labels - 1  # classes 0–5
    return np.array(features), labels

# Load data
X_train, y_train = extract_features_and_labels("train")
X_test, y_test = extract_features_and_labels("test")

# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train AdaBoost
model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),
    n_estimators=100,
    learning_rate=1.0,
    random_state=42
)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("📊 AdaBoost Report:\n", classification_report(y_test, y_pred))
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"🎯 F1 Macro Score: {f1_score(y_test, y_pred, average='macro'):.4f}")

# Save model
joblib.dump(model, PKL_PATH)

# Export to ONNX
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(model, initial_types=initial_type)
with open(ONNX_PATH, "wb") as f:
    f.write(onnx_model.SerializeToString())

print(f"✅ AdaBoost model saved to: {PKL_PATH}")
print(f"✅ AdaBoost ONNX saved to: {ONNX_PATH}")


  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  labels = pd.read_csv(labels_path, delim_whitespace=True, header=None)[0]
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace=True, header=None)
  return pd.read_csv(path, delim_whitespace

📊 AdaBoost Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00       496
           1       0.44      0.86      0.58       471
           2       0.75      0.82      0.78       420
           3       0.75      0.01      0.01       491
           4       0.52      1.00      0.68       532
           5       1.00      1.00      1.00       537

    accuracy                           0.62      2947
   macro avg       0.58      0.61      0.51      2947
weighted avg       0.58      0.62      0.51      2947

✅ Accuracy: 0.6179
🎯 F1 Macro Score: 0.5099
✅ AdaBoost model saved to: har_adaboost_model.pkl
✅ AdaBoost ONNX saved to: har_adaboost_model.onnx


In [18]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LayerNormalization, Dropout, MultiHeadAttention, GlobalAveragePooling1D, Add
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import os

DATASET_DIR = "C:\\Users\\EGYPT\\Downloads\\src\\Mobile\\UCI HAR Dataset"

def load_signal(sensor, axis, split="train"):
    path = f"{DATASET_DIR}/{split}/Inertial Signals/{sensor}_{axis}_{split}.txt"
    return pd.read_csv(path, delim_whitespace=True, header=None).values

def load_data(split="train"):
    sensors = ["body_acc", "body_gyro", "total_acc"]
    axes = ["x", "y", "z"]
    signals = [load_signal(s, a, split) for s in sensors for a in axes]
    X = np.stack(signals, axis=-1)  # Shape: (samples, time_steps, features)

    labels_path = f"{DATASET_DIR}/{split}/y_{split}.txt"
    y = pd.read_csv(labels_path, delim_whitespace=True, header=None)[0] - 1
    return X, to_categorical(y, num_classes=6)

# Load and scale data
X_train, y_train = load_data("train")
X_test, y_test = load_data("test")

scaler = StandardScaler()
nsamples, ntimesteps, nfeatures = X_train.shape
X_train = scaler.fit_transform(X_train.reshape(-1, nfeatures)).reshape(nsamples, ntimesteps, nfeatures)
X_test = scaler.transform(X_test.reshape(-1, nfeatures)).reshape(X_test.shape[0], ntimesteps, nfeatures)

# Transformer block
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0.1):
    x = LayerNormalization(epsilon=1e-6)(inputs)
    x = MultiHeadAttention(key_dim=head_size, num_heads=num_heads, dropout=dropout)(x, x)
    x = Dropout(dropout)(x)
    res = Add()([x, inputs])

    x = LayerNormalization(epsilon=1e-6)(res)
    x = Dense(ff_dim, activation="relu")(x)
    x = Dropout(dropout)(x)
    x = Dense(inputs.shape[-1])(x)
    return Add()([x, res])

# Build model
def build_transformer_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = transformer_encoder(inputs, head_size=64, num_heads=4, ff_dim=128)
    x = transformer_encoder(x, head_size=64, num_heads=4, ff_dim=128)
    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation="relu")(x)
    x = Dropout(0.3)(x)
    outputs = Dense(num_classes, activation="softmax")(x)
    return Model(inputs, outputs)

model = build_transformer_model(input_shape=(X_train.shape[1], X_train.shape[2]), num_classes=6)
model.compile(loss="categorical_crossentropy", optimizer=Adam(1e-4), metrics=["accuracy"])
model.summary()

# Train
model.fit(X_train, y_train, validation_split=0.2, epochs=15, batch_size=64)

# Evaluate
y_pred = model.predict(X_test)
y_pred_labels = np.argmax(y_pred, axis=1)
y_true = np.argmax(y_test, axis=1)

print("📊 Transformer Classifier Report:\n", classification_report(y_true, y_pred_labels))
print(f"✅ Accuracy: {accuracy_score(y_true, y_pred_labels):.4f}")

model.save("har_transformer_model.h5")
print("✅ Transformer Keras model saved to har_transformer_model.h5")


import tf2onnx
import onnx

spec = (tf.TensorSpec(model.input.shape, tf.float32, name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13)

onnx.save(onnx_model, "har_transformer_model.onnx")
print("✅ Transformer ONNX model saved to har_transformer_model.onnx")



  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  y = pd.read_csv(labels_path, delim_whitespace=True, header=None)[0] - 1
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read_csv(path, delim_whitespace=True, header=None).values
  return pd.read

Epoch 1/15
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 261ms/step - accuracy: 0.1647 - loss: 1.8407 - val_accuracy: 0.4650 - val_loss: 1.4793
Epoch 2/15
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 254ms/step - accuracy: 0.4308 - loss: 1.4641 - val_accuracy: 0.5969 - val_loss: 1.2207
Epoch 3/15
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m23s[0m 249ms/step - accuracy: 0.5100 - loss: 1.2780 - val_accuracy: 0.6322 - val_loss: 1.0689
Epoch 4/15
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 276ms/step - accuracy: 0.5917 - loss: 1.1070 - val_accuracy: 0.6615 - val_loss: 0.9781
Epoch 5/15
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 277ms/step - accuracy: 0.6215 - loss: 0.9946 - val_accuracy: 0.6635 - val_loss: 0.9082
Epoch 6/15
[1m92/92[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 307ms/step - accuracy: 0.6629 - loss: 0.9020 - val_accuracy: 0.6791 - val_loss: 0.8511
Epoch 7/15
[1m92/92[

In [19]:
model.save("har_transformer_model.h5")
print("✅ Transformer Keras model saved to har_transformer_model.h5")




✅ Transformer Keras model saved to har_transformer_model.h5


In [20]:
import tf2onnx
import onnx

spec = (tf.TensorSpec(model.input.shape, tf.float32, name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13)

onnx.save(onnx_model, "har_transformer_model.onnx")
print("✅ Transformer ONNX model saved to har_transformer_model.onnx")


ERROR:tf2onnx.tfonnx:rewriter <function rewrite_constant_fold at 0x00000276D3BA74C0>: exception `np.cast` was removed in the NumPy 2.0 release. Use `np.asarray(arr, dtype=dtype)` instead.


✅ Transformer ONNX model saved to har_transformer_model.onnx


In [22]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, LayerNormalization, MultiHeadAttention, Dropout, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import joblib
import tf2onnx
import tensorflow as tf

# Paths
DATASET_DIR = "C:\\Users\\EGYPT\\Downloads\\src\\Mobile\\UCI HAR Dataset"
WINDOW_SIZE = 50
STEP = 25
N_FEATURES = 9
PKL_PATH = "transformer_model.pkl"
ONNX_PATH = "transformer_model.onnx"

# Load raw signal data
def load_signal(sensor, axis, split):
    file_path = f"{DATASET_DIR}/{split}/Inertial Signals/{sensor}_{axis}_{split}.txt"
    return pd.read_csv(file_path, delim_whitespace=True, header=None).values

# Create windowed dataset
def create_windowed_data(split="train"):
    sensors = ["body_acc", "body_gyro", "total_acc"]
    axes = ["x", "y", "z"]
    signals = [load_signal(s, a, split) for s in sensors for a in axes]
    signals = np.stack(signals, axis=-1)  # shape: (samples, 128, 9)

    X, y = [], []
    labels_path = f"{DATASET_DIR}/{split}/y_{split}.txt"
    labels = pd.read_csv(labels_path, delim_whitespace=True, header=None).values.flatten() - 1

    for i in range(signals.shape[0]):
        sequence = signals[i]
        label = labels[i]
        for start in range(0, sequence.shape[0] - WINDOW_SIZE + 1, STEP):
            window = sequence[start:start + WINDOW_SIZE]
            X.append(window)
            y.append(label)

    return np.array(X), np.array(y)

# Load and scale data
X_train, y_train = create_windowed_data("train")
X_test, y_test = create_windowed_data("test")

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train.reshape(-1, N_FEATURES)).reshape(-1, WINDOW_SIZE, N_FEATURES)
X_test = scaler.transform(X_test.reshape(-1, N_FEATURES)).reshape(-1, WINDOW_SIZE, N_FEATURES)
joblib.dump(scaler, "transformer_scaler.pkl")

# Encode labels
encoder = LabelEncoder()
y_train = encoder.fit_transform(y_train)
y_test = encoder.transform(y_test)
joblib.dump(encoder, "transformer_label_encoder.pkl")

# Build Transformer model
def build_transformer_model(input_shape, num_classes):
    inputs = Input(shape=input_shape)
    x = LayerNormalization()(inputs)
    x = MultiHeadAttention(num_heads=4, key_dim=64)(x, x)
    x = Dropout(0.3)(x)
    x = LayerNormalization()(x)
    x = GlobalAveragePooling1D()(x)
    x = Dense(128, activation="relu")(x)
    outputs = Dense(num_classes, activation="softmax")(x)
    return Model(inputs, outputs)

model = build_transformer_model((WINDOW_SIZE, N_FEATURES), len(np.unique(y_train)))
model.compile(optimizer=Adam(1e-4), loss="sparse_categorical_crossentropy", metrics=["accuracy"])

model.fit(
    X_train, y_train,
    validation_split=0.2,
    epochs=20,
    batch_size=64,
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True)],
    verbose=1
)

# Evaluate
y_pred = np.argmax(model.predict(X_test), axis=1)
print("📊 Transformer Report:\n", classification_report(y_test, y_pred))
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")

# Save model and encoder
model.save("transformer_model.h5")
joblib.dump(model, PKL_PATH)

# Convert to ONNX
spec = (tf.TensorSpec((None, WINDOW_SIZE, N_FEATURES), tf.float32, name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, output_path=ONNX_PATH)
print(f"✅ Saved Transformer model to {PKL_PATH} and {ONNX_PATH}")


  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  labels = pd.read_csv(labels_path, delim_whitespace=True, header=None).values.flatten() - 1
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  return pd.read_csv(file_path, delim_whitespace=True, header=None).values
  retur

Epoch 1/20
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 35ms/step - accuracy: 0.5649 - loss: 1.4282 - val_accuracy: 0.7428 - val_loss: 0.8797
Epoch 2/20
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 38ms/step - accuracy: 0.7833 - loss: 0.8133 - val_accuracy: 0.8035 - val_loss: 0.6118
Epoch 3/20
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 37ms/step - accuracy: 0.8568 - loss: 0.5497 - val_accuracy: 0.8674 - val_loss: 0.4530
Epoch 4/20
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 35ms/step - accuracy: 0.8959 - loss: 0.3820 - val_accuracy: 0.8861 - val_loss: 0.3835
Epoch 5/20
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 35ms/step - accuracy: 0.9105 - loss: 0.2957 - val_accuracy: 0.8944 - val_loss: 0.3370
Epoch 6/20
[1m368/368[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 35ms/step - accuracy: 0.9167 - loss: 0.2539 - val_accuracy: 0.9043 - val_loss: 0.3384
Epoch 7/20
[1m3



📊 Transformer Report:
               precision    recall  f1-score   support

           0       0.84      0.82      0.83      1984
           1       0.85      0.81      0.83      1884
           2       0.77      0.87      0.81      1680
           3       0.84      0.75      0.79      1964
           4       0.80      0.86      0.83      2128
           5       1.00      0.97      0.98      2148

    accuracy                           0.85     11788
   macro avg       0.85      0.85      0.85     11788
weighted avg       0.85      0.85      0.85     11788

✅ Accuracy: 0.8497


ERROR:tf2onnx.tfonnx:rewriter <function rewrite_constant_fold at 0x00000276D3BA74C0>: exception `np.cast` was removed in the NumPy 2.0 release. Use `np.asarray(arr, dtype=dtype)` instead.


✅ Saved Transformer model to transformer_model.pkl and transformer_model.onnx


In [23]:
import tf2onnx
import onnx

spec = (tf.TensorSpec(model.input.shape, tf.float32, name="input"),)
onnx_model, _ = tf2onnx.convert.from_keras(model, input_signature=spec, opset=13)

onnx.save(onnx_model, "har_transformer_model.onnx")
print("✅ Transformer ONNX model saved to har_transformer_model.onnx")


ERROR:tf2onnx.tfonnx:rewriter <function rewrite_constant_fold at 0x00000276D3BA74C0>: exception `np.cast` was removed in the NumPy 2.0 release. Use `np.asarray(arr, dtype=dtype)` instead.


✅ Transformer ONNX model saved to har_transformer_model.onnx


In [2]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
import joblib
from scipy.stats import skew, kurtosis
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import GridSearchCV

# Paths
DATASET_DIR = "C:\\Users\\EGYPT\\Downloads\\src\\Mobile\\UCI HAR Dataset"
PKL_PATH = "../Models/har_rf_model_v2.pkl"
ONNX_PATH = "../Models/har_rf_model_v2.onnx"

def load_signal(sensor, axis, split="train"):
    path = f"{DATASET_DIR}/{split}/Inertial Signals/{sensor}_{axis}_{split}.txt"
    return pd.read_csv(path, sep='\s+', header=None)

def extract_features_and_labels(split="train"):
    sensors = ["body_acc", "body_gyro", "total_acc"]
    axes = ["x", "y", "z"]
    signals = [load_signal(s, a, split) for s in sensors for a in axes]

    features = []
    for i in range(signals[0].shape[0]):
        f = []
        for sig in signals:
            row = sig.iloc[i]
            # Add 9 stats per signal: mean, std, min, max, median, energy, range, skewness, kurtosis
            vals = row.values
            f.extend([
                np.mean(vals),
                np.std(vals),
                np.min(vals),
                np.max(vals),
                np.median(vals),
                np.sum(vals ** 2) / len(vals),  # energy
                np.max(vals) - np.min(vals),    # range
                skew(vals),
                kurtosis(vals)
            ])
        features.append(f)

    labels_path = f"{DATASET_DIR}/{split}/y_{split}.txt"
    labels = pd.read_csv(labels_path, sep='\s+', header=None)[0]
    labels = labels - 1  # Shift to 0-5
    return np.array(features), labels

# Load data
X_train, y_train = extract_features_and_labels("train")
X_test, y_test = extract_features_and_labels("test")

# Normalize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Handle class imbalance with SMOTE
smote = SMOTE(random_state=42)
X_train, y_train = smote.fit_resample(X_train, y_train)

# Hyperparameter tuning
param_grid = {
    'n_estimators': [300, 500],
    'max_depth': [20, 30, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2],
    'class_weight': ['balanced']
}

rf = RandomForestClassifier(random_state=42)
grid_search = GridSearchCV(rf, param_grid, cv=3, scoring='accuracy', n_jobs=-1)
grid_search.fit(X_train, y_train)

model = grid_search.best_estimator_

# Evaluate
y_pred = model.predict(X_test)
print("📊 AdaBoost Report:\n", classification_report(y_test, y_pred))
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print(f"🎯 F1 Macro Score: {f1_score(y_test, y_pred, average='macro'):.4f}")

# Save model
joblib.dump(model, PKL_PATH)

# Export to ONNX
initial_type = [('float_input', FloatTensorType([None, X_train.shape[1]]))]
onnx_model = convert_sklearn(model, initial_types=initial_type)
with open(ONNX_PATH, "wb") as f:
    f.write(onnx_model.SerializeToString())

print(f"✅ AdaBoost model saved to: {PKL_PATH}")
print(f"✅ AdaBoost ONNX saved to: {ONNX_PATH}")

  return pd.read_csv(path, sep='\s+', header=None)
  labels = pd.read_csv(labels_path, sep='\s+', header=None)[0]


📊 AdaBoost Report:
               precision    recall  f1-score   support

           0       0.69      0.68      0.68       496
           1       0.70      0.75      0.72       471
           2       0.91      0.85      0.88       420
           3       0.88      0.84      0.86       491
           4       0.86      0.89      0.88       532
           5       1.00      1.00      1.00       537

    accuracy                           0.84      2947
   macro avg       0.84      0.84      0.84      2947
weighted avg       0.84      0.84      0.84      2947

✅ Accuracy: 0.8392


  return pd.read_csv(path, sep='\s+', header=None)
  labels = pd.read_csv(labels_path, sep='\s+', header=None)[0]


NameError: name 'f1_score' is not defined

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
from imblearn.over_sampling import SMOTE
import joblib
import matplotlib.pyplot as plt

# Paths
DATASET_DIR = "/src/Mobile/UCI HAR Dataset"
PKL_PATH = "../Models/har_rf_model_op1_smote.pkl"
ONNX_PATH = "../Models/har_rf_model_op1_smote.onnx"

def load_signal(sensor, axis, split="train"):
    path = f"{DATASET_DIR}/{split}/Inertial Signals/{sensor}_{axis}_{split}.txt"
    return pd.read_csv(path, sep='\s+', header=None)

def extract_features_and_labels(split="train"):
    sensors = ["body_acc", "body_gyro", "total_acc"]
    axes = ["x", "y", "z"]
    signals = [load_signal(s, a, split) for s in sensors for a in axes]

    features = []
    for i in range(signals[0].shape[0]):
        f = []
        for sig in signals:
            row = sig.iloc[i]
            f.extend([row.mean(), row.std(), row.min(), row.max()])
        features.append(f)

    labels_path = f"{DATASET_DIR}/{split}/y_{split}.txt"
    labels = pd.read_csv(labels_path, sep='\s+', header=None)[0]
    labels = labels - 1  # Convert to 0–5
    return np.array(features), labels

# Load data
X_train, y_train = extract_features_and_labels("train")
X_test, y_test = extract_features_and_labels("test")

# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Apply SMOTE
print("🔁 Applying SMOTE...")
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)
print(f"✅ Resampled shape: {X_train_bal.shape}, {np.bincount(y_train_bal)}")

# Optimized Random Forest
model = RandomForestClassifier(
    n_estimators=300,
    max_depth=30,
    min_samples_leaf=2,
    min_samples_split=4,
    class_weight=None,  # No need for class_weight with SMOTE
    random_state=42,
    n_jobs=-1
)
model.fit(X_train_bal, y_train_bal)

# Evaluate
y_pred = model.predict(X_test)
print("📊 Random Forest + SMOTE Report:\n", classification_report(y_test, y_pred))
print(f"✅ Accuracy: {accuracy_score(y_test, y_pred):.4f}")
print("🔍 Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

# Feature Importance Plot (Optional)
importances = model.feature_importances_
indices = np.argsort(importances)[::-1]
plt.figure(figsize=(10, 5))
plt.title("Feature Importances")
plt.bar(range(X_train.shape[1]), importances[indices])
plt.xticks(range(X_train.shape[1]), indices, rotation=90)
plt.tight_layout()
plt.show()

# Save model and scaler
os.makedirs(os.path.dirname(PKL_PATH), exist_ok=True)
joblib.dump(scaler, PKL_PATH.replace(".pkl", "_scaler.pkl"))
joblib.dump(model, PKL_PATH)

# Export to ONNX
initial_type = [('float_input', FloatTensorType([None, 36]))]
onnx_model = convert_sklearn(model, initial_types=initial_type)
with open(ONNX_PATH, "wb") as f:
    f.write(onnx_model.SerializeToString())

print(f"✅ RF + SMOTE ONNX saved to: {ONNX_PATH}")