<a href="https://colab.research.google.com/github/Propa-Punam/Wifi-RSS-Crowdsensing/blob/main/understand/part_1_understanding_phase.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
import pandas as pd
from io import StringIO

# Training data (3 samples for 203, 3 for 204)
train_data_str = """StudentID,room,CSE-306,CSE-304,CSE-401,CSE-104
2005037,203,-100,-71,-100,-83
2005026,203,-94,-58,-87,-80
2005006,203,-100,-68,-100,-74
2005074,204,-100,-61,-100,-82
2005084,204,-94,-58,-87,-80
2005085,204,-100,-68,-100,-74"""

# Test data (3 samples for 203, 3 for 204)
test_data_str = """StudentID,room,CSE-306,CSE-304,CSE-401,CSE-104
2005004,203,-100,-52,-100,-79
2005014,203,-77,-60,-87,-64
2005045,203,-72,-67,-77,-70
2005038,204,-72,-67,-77,-70
2005108,204,-88,-54,-82,-73
2005102,204,-72,-60,-81,-63"""

# Load data
print("Loading training data:")
train_df = pd.read_csv(StringIO(train_data_str))
print("Training DataFrame:")
print(train_df)

print("\nLoading test data:")
test_df = pd.read_csv(StringIO(test_data_str))
print("Test DataFrame:")
print(test_df)

# Extract RSSI values and zones
print("\nExtracting training features (X_train) and labels (y_train):")
X_train = train_df.iloc[:, 2:].values
y_train = train_df['room'].astype(str).values  # Convert to strings
print("X_train (RSSI values):")
print(X_train)
print("y_train (room labels):")
print(y_train)

print("\nExtracting test features (X_test) and labels (y_test):")
X_test = test_df.iloc[:, 2:].values
y_test = test_df['room'].astype(str).values  # Convert to strings
print("X_test (RSSI values):")
print(X_test)
print("y_test (room labels):")
print(y_test)

# Define zones and indices
zones = ['203', '204']
print("\nDefined zones:")
print(zones)

zone_to_idx = {zone: i for i, zone in enumerate(zones)}
print("Zone to index mapping:")
print(zone_to_idx)

n_zones = len(zones)
print("Number of zones (n_zones):", n_zones)

n_aps = X_train.shape[1]
print("Number of APs (n_aps):", n_aps)

n_samples = X_train.shape[0]
print("Number of training samples (n_samples):", n_samples)

# Parameters
learning_rate = 0.1
print("Learning rate:", learning_rate)

n_iterations = 50
print("Number of iterations:", n_iterations)

samples_per_zone_for_init = 2  # Using 2 samples for initialization (adjustable)
print("Samples per zone for initialization:", samples_per_zone_for_init)

# Initialize weights
weights = np.zeros((n_aps, n_zones))
print("\nInitial Weights (before initialization):")
print(weights.T)

for zone in zones:
    zone_idx = zone_to_idx[zone]
    print(f"\nInitializing weights for zone {zone} (index {zone_idx}):")
    zone_samples = X_train[y_train == zone]
    print(f"Samples for zone {zone}:")
    print(zone_samples)

    init_samples = zone_samples[:min(samples_per_zone_for_init, len(zone_samples))]
    print(f"Initial samples (up to {samples_per_zone_for_init}):")
    print(init_samples)

    weights[:, zone_idx] = np.mean(init_samples, axis=0)
    print(f"Updated weights for zone {zone} (mean of initial samples):")
    print(weights[:, zone_idx])

print("\nInitial Weights (after initialization):")
print(weights.T)

# Training
print("\nStarting training...")
for iteration in range(n_iterations):
    print(f"\nIteration {iteration + 1}:")
    print("Weights at start of iteration:")
    print(weights.T)

    for i in range(n_samples):
        input_vector = X_train[i]
        print(f"\n  Sample {i + 1}:")
        print(f"  Input vector: {input_vector}")

        val_j = np.dot(weights.T, input_vector)
        print(f"  VAL_j (dot products for each zone): {val_j}")

        winner_idx = np.argmax(val_j)
        print(f"  Winner index: {winner_idx} (Zone {zones[winner_idx]})")

        old_weights = weights[:, winner_idx].copy()
        weights[:, winner_idx] += learning_rate * (input_vector - weights[:, winner_idx])
        print(f"  Old weights for winner (Zone {zones[winner_idx]}): {old_weights}")
        print(f"  New weights for winner (Zone {zones[winner_idx]}): {weights[:, winner_idx]}")
        print(f"  Updated weights after sample {i + 1}:")
        print(weights.T)

print("\nFinal Trained Weights:")
print(weights.T)

# Testing
print("\nStarting testing...")
correct_predictions = 0
predictions = []

for i in range(len(X_test)):
    input_vector = X_test[i]
    true_zone = y_test[i]
    print(f"\nTest Sample {i + 1}:")
    print(f"  Input vector: {input_vector}")
    print(f"  True zone: {true_zone}")

    val_j = np.dot(weights.T, input_vector)
    print(f"  VAL_j (dot products for each zone): {val_j}")

    predicted_idx = np.argmax(val_j)
    predicted_zone = zones[predicted_idx]
    print(f"  Predicted index: {predicted_idx}")
    print(f"  Predicted zone: {predicted_zone}")

    predictions.append(predicted_zone)
    if predicted_zone == true_zone:
        correct_predictions += 1
        print(f"  Correct: True")
    else:
        print(f"  Correct: False")

# Calculate accuracy
accuracy = correct_predictions / len(X_test) * 100
print(f"\nAccuracy: {accuracy:.2f}% ({correct_predictions}/{len(X_test)} correct)")

# Results
results_df = pd.DataFrame({'True Zone': y_test, 'Predicted Zone': predictions})
print("\nResults:")
print(results_df)

Loading training data:
Training DataFrame:
   StudentID  room  CSE-306  CSE-304  CSE-401  CSE-104
0    2005037   203     -100      -71     -100      -83
1    2005026   203      -94      -58      -87      -80
2    2005006   203     -100      -68     -100      -74
3    2005074   204     -100      -61     -100      -82
4    2005084   204      -94      -58      -87      -80
5    2005085   204     -100      -68     -100      -74

Loading test data:
Test DataFrame:
   StudentID  room  CSE-306  CSE-304  CSE-401  CSE-104
0    2005004   203     -100      -52     -100      -79
1    2005014   203      -77      -60      -87      -64
2    2005045   203      -72      -67      -77      -70
3    2005038   204      -72      -67      -77      -70
4    2005108   204      -88      -54      -82      -73
5    2005102   204      -72      -60      -81      -63

Extracting training features (X_train) and labels (y_train):
X_train (RSSI values):
[[-100  -71 -100  -83]
 [ -94  -58  -87  -80]
 [-100  -68 -100  -7