<a href="https://colab.research.google.com/github/Propa-Punam/Wifi-RSS-Crowdsensing/blob/main/correct%20so%20far/without_grey_code_correct_so_far.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder

class SimplifiedCPNWifiLocalizer:
    def __init__(self, learning_rate=0.1):
        self.beta = learning_rate
        self.kohonen_weights = None
        self.zones = None
        self.n_features = None
        self.label_encoder = LabelEncoder()

    def _initialize_centroids(self, X, y_encoded, original_labels):
        """Initialize centroids using mean RSSI vector for each zone"""
        unique_zones = np.unique(y_encoded)
        self.zones = unique_zones
        self.n_features = X.shape[1]

        print("Initializing centroids for zones:")
        for i, zone in enumerate(unique_zones):
            original_zone = original_labels[y_encoded == zone][0]
            print(f"Zone {original_zone} (encoded: {zone})")

        # Initialize Kohonen layer weights with zone means
        self.kohonen_weights = np.zeros((len(unique_zones), self.n_features))
        for i, zone in enumerate(unique_zones):
            zone_samples = X[y_encoded == zone]
            self.kohonen_weights[i] = np.mean(zone_samples, axis=0)
            original_zone = original_labels[y_encoded == zone][0]
            print(f"Zone {original_zone} centroid initialized with {len(zone_samples)} samples")

    def _compute_activation(self, rss_vector):
        """Compute activation values for each zone neuron"""
        return np.sum(rss_vector * self.kohonen_weights, axis=1)

    def train(self, X, y, epochs=100):
        """Train the CPN model using only Kohonen layer"""
        # Convert string labels to numeric
        original_labels = y.copy()
        y_encoded = self.label_encoder.fit_transform(y)

        # Initialize centroids
        self._initialize_centroids(X, y_encoded, original_labels)

        print("\nStarting training...")
        for epoch in range(epochs):
            epoch_accuracy = 0
            for i in range(len(X)):
                # Get current sample
                rss_vector = X[i]
                true_zone = y_encoded[i]

                # Compute activation values
                activation_values = self._compute_activation(rss_vector)

                # Find winning neuron
                winner_idx = np.argmax(activation_values)

                # Update Kohonen weights for winning neuron
                self.kohonen_weights[winner_idx] += self.beta * (
                    rss_vector - self.kohonen_weights[winner_idx]
                )

                # Track accuracy during training
                true_zone_idx = np.where(self.zones == true_zone)[0][0]
                if winner_idx == true_zone_idx:
                    epoch_accuracy += 1

            if (epoch + 1) % 10 == 0:
                print(f"Epoch {epoch + 1}/{epochs}, Training Accuracy: {epoch_accuracy/len(X)*100:.2f}%")

    def predict(self, X):
        """Predict zones for new RSSI measurements"""
        predictions = []
        for rss_vector in X:
            # Compute activation values
            activation_values = self._compute_activation(rss_vector)

            # Find winning neuron directly maps to zone
            winner_idx = np.argmax(activation_values)
            predicted_zone = self.zones[winner_idx]
            predictions.append(predicted_zone)

        # Convert numeric predictions back to original labels
        return self.label_encoder.inverse_transform(predictions)

def prepare_data(df):
    """Prepare data by separating features and labels"""
    X = df.iloc[:, 2:].values.astype(float)  # Convert RSSI values to float
    y = df['room'].astype(str).values  # Ensure room numbers are strings
    return X, y

def main():
    # Load data
    train_df = pd.read_csv('/content/train.csv')
    test_df = pd.read_csv('/content/test.csv')

    # Convert room columns to string type
    train_df['room'] = train_df['room'].astype(str)
    test_df['room'] = test_df['room'].astype(str)

    print("Data loaded:")
    print(f"Training samples: {len(train_df)}")
    print(f"Test samples: {len(test_df)}")
    print(f"Unique rooms in training: {train_df['room'].unique()}")
    print(f"Unique rooms in test: {test_df['room'].unique()}\n")

    # Prepare training and test data
    X_train, y_train = prepare_data(train_df)
    X_test, y_test = prepare_data(test_df)

    # Initialize and train the model
    model = SimplifiedCPNWifiLocalizer(learning_rate=0.1)
    model.train(X_train, y_train, epochs=100)

    # Make predictions on test set
    y_pred = model.predict(X_test)

    # Calculate and display results
    accuracy = accuracy_score(y_test, y_pred)
    print(f"\nTest Accuracy: {accuracy * 100:.2f}%")

    # Create results DataFrame
    results_df = pd.DataFrame({
        'StudentID': test_df['StudentID'],
        'Actual Room': y_test,
        'Predicted Room': y_pred,
        'Correct?': y_test == y_pred
    })

    # Print prediction summary
    print("\nPrediction Summary:")
    unique_rooms = sorted(set(np.concatenate([y_test, y_pred])))
    room_summary = pd.DataFrame({
        'Room': unique_rooms,
        'Total Cases': [sum(y_test == room) for room in unique_rooms],
        'Correct Predictions': [sum((y_test == room) & (y_pred == room)) for room in unique_rooms]
    })
    room_summary['Accuracy'] = (room_summary['Correct Predictions'] / room_summary['Total Cases'] * 100).round(2)
    print(room_summary)

    # Save results
    results_df.to_csv('prediction_results.csv', index=False)
    print("\nDetailed results have been saved to 'prediction_results.csv'")

if __name__ == "__main__":
    main()

Data loaded:
Training samples: 25
Test samples: 23
Unique rooms in training: ['203' '204' 'l1' 'l2' 'l3']
Unique rooms in test: ['203' '204']

Initializing centroids for zones:
Zone 203 (encoded: 0)
Zone 204 (encoded: 1)
Zone l1 (encoded: 2)
Zone l2 (encoded: 3)
Zone l3 (encoded: 4)
Zone 203 centroid initialized with 12 samples
Zone 204 centroid initialized with 10 samples
Zone l1 centroid initialized with 1 samples
Zone l2 centroid initialized with 1 samples
Zone l3 centroid initialized with 1 samples

Starting training...
Epoch 10/100, Training Accuracy: 40.00%
Epoch 20/100, Training Accuracy: 16.00%
Epoch 30/100, Training Accuracy: 36.00%
Epoch 40/100, Training Accuracy: 40.00%
Epoch 50/100, Training Accuracy: 48.00%
Epoch 60/100, Training Accuracy: 48.00%
Epoch 70/100, Training Accuracy: 48.00%
Epoch 80/100, Training Accuracy: 48.00%
Epoch 90/100, Training Accuracy: 48.00%
Epoch 100/100, Training Accuracy: 48.00%

Test Accuracy: 52.17%

Prediction Summary:
  Room  Total Cases  Corr