### Pre-Processing

In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix

In [25]:

# Load the CSV file
file_path = "cleaned_data.csv"  # Replace with the path to your CSV file
df = pd.read_csv(file_path)

In [26]:
# Data Preprocessing
# Keep relevant columns for human presence detection (adjust as needed)
columns_to_keep = ['rssi', 'rate', 'noise_floor', 'data', 'local_time', 'channel']
df_cleaned = df[columns_to_keep]

In [27]:
# Handling missing values (if any)
df_cleaned['rssi'] = df_cleaned['rssi'].fillna(df_cleaned['rssi'].mean())
df_cleaned['rate'] = df_cleaned['rate'].fillna(df_cleaned['rate'].mean())
df_cleaned['noise_floor'] = df_cleaned['noise_floor'].fillna(df_cleaned['noise_floor'].mean())

In [5]:
# Feature engineering: Convert 'data' column to usable format (e.g., take the mean of the list)
df_cleaned['data_mean'] = df_cleaned['data'].apply(lambda x: eval(x) if isinstance(x, str) else x)
df_cleaned['data_mean'] = df_cleaned['data_mean'].apply(lambda x: sum(x)/len(x) if x else 0)

In [28]:
# Drop the original 'data' column after extracting the feature
df_cleaned = df_cleaned.drop(columns=['data'])

In [29]:
# For simplicity, let's create a binary target (human_present)
df_cleaned['human_present'] = (df_cleaned['rssi'] > -70).astype(int)  # Example threshold for presence

In [16]:
# For simplicity, let's create a binary target (human_present), you need labeled data for this
# Assuming presence of human is detected based on some threshold (e.g., signal strength, noise level)
# You may need labeled data for this task; here we're creating an example target
df_cleaned['human_present'] = (df_cleaned['rssi'] > -70).astype(int)  # Example threshold for presence


In [30]:
# Class distribution before handling imbalance
print("Class distribution before balancing:")
print(df_cleaned['human_present'].value_counts())

Class distribution before balancing:
human_present
1    4594
Name: count, dtype: int64


In [33]:
# Check if the minority class has enough samples
if df_cleaned['human_present'].value_counts().min() < 1:
    print("Not enough samples in the dataset to proceed.")
else:
    # Under-sampling the majority class
    df_majority = df_cleaned[df_cleaned['human_present'] == 1]
    df_minority = df_cleaned[df_cleaned['human_present'] == 0]

    # Randomly select the same number of samples from the majority class as in the minority class
    df_majority_downsampled = df_majority.sample(n=len(df_minority), random_state=42)

    # Combine minority class with downsampled majority class
    df_balanced = pd.concat([df_majority_downsampled, df_minority])

    # Shuffle the dataset
    df_balanced = df_balanced.sample(frac=1, random_state=42).reset_index(drop=True)

    # Re-split the balanced data into features and target variable
    X_balanced = df_balanced.drop(columns=['human_present'])
    y_balanced = df_balanced['human_present']

    # Train-test split
    if len(X_balanced) > 1:  # Ensure there are enough samples to split
        X_train, X_test, y_train, y_test = train_test_split(X_balanced, y_balanced, test_size=0.2, random_state=42)

        # Train the Logistic Regression model
        model = LogisticRegression()
        model.fit(X_train, y_train)

        # Make predictions on the test set
        y_pred = model.predict(X_test)

        # Evaluate the model
        accuracy = accuracy_score(y_test, y_pred)
        conf_matrix = confusion_matrix(y_test, y_pred)

        print(f"Model Accuracy: {accuracy * 100:.2f}%")
        print("Confusion Matrix:")
        print(conf_matrix)
    else:
        print("Not enough samples to perform train-test split.")

Not enough samples to perform train-test split.
