In [4]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.linear_model import LogisticRegression
    

In [9]:

# Load the dataset
import os
# Define the relative path to go up one directory and access dataset.csv
file_path = os.path.join(os.getcwd(), '..', 'dataset.csv')

selected_columns = ['sub', 'act', 'alx', 'aly', 'alz']
data = pd.read_csv(file_path, usecols=selected_columns)
print(f"Dataset loaded successfully from {file_path}")

Dataset loaded successfully from d:\AI\Intelligence\Infra\human_activity_recognition\Experiment\..\dataset.csv


In [None]:

# Rename columns
data.rename(columns={'act': 'activity', 'sub': 'subject'}, inplace=True)
print("Columns renamed: 'act' -> 'activity', 'sub' -> 'subject'")
    

In [None]:

# Remove duplicate rows
before_count = len(data)
data = data.drop_duplicates()
after_count = len(data)
print(f"Removed {before_count - after_count} duplicate rows. Remaining rows: {after_count}.")
    

In [None]:

# Handle missing values by removing rows with NaN in 'subject' or 'activity' columns
required_columns = ['subject', 'activity']
data = data.dropna(subset=required_columns)
print("Rows with NaN in 'subject' or 'activity' columns have been removed.")
    

In [None]:

# Handle missing values for other columns using the mean strategy
for column in data.columns:
    if data[column].isnull().sum() > 0:
        data[column] = data[column].fillna(data[column].mean())
print("Missing values handled using mean strategy.")
    

In [None]:

# Fix incorrect datatypes
print("---------------------------------------Before fixing the datatypes------------------------")
print(data.dtypes)
data['subject'] = data['subject'].astype('string')
print("Column 'subject' converted to string.")
print("---------------------------------------After fixing the datatypes------------------------")
print(data.dtypes)
    

In [None]:

# Plot the distribution of all activities
plt.figure(figsize=(10, 8))
plt.title("Barplot of All Activities", fontsize=16)
sns.countplot(data=data, x='activity')
plt.xticks(rotation=90, fontsize=12)
plt.xlabel('Activity', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.show()
    

In [None]:

# Filter selected activities
selected_activities = ["Jogging", "Walking", "Jumping", "Cycling", "Sitting"]
data = data[data['activity'].isin(selected_activities)]
data['activity'] = data['activity'].apply(lambda x: x if x in selected_activities else "0")
activity_counts = data['activity'].value_counts()
print(f"Counts of selected activities:{activity_counts}")
    

In [None]:

# Plot the distribution of selected activities
plt.figure(figsize=(10, 8))
plt.title("Barplot of Selected Activities", fontsize=16)
sns.countplot(data=data, x='activity')
plt.xticks(rotation=90, fontsize=12)
plt.xlabel('Activity', fontsize=14)
plt.ylabel('Count', fontsize=14)
plt.show()
    

In [None]:

# Encode categorical data
le = LabelEncoder()
data['activity'] = le.fit_transform(data['activity'])
    

In [None]:

# Split the dataset into features and labels
X = data.drop(columns=['activity', 'subject'])  # Drop 'subject' as well
y = data['activity']
    

In [None]:

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)
print(f"Dataset split into training and testing sets. Test size: 0.2")
    

In [None]:

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
print("Features scaled successfully.")
    

In [None]:

# Train logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
print("Logistic Regression model trained successfully.")
    

In [None]:

# Evaluate the model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))
    