<a href="https://colab.research.google.com/github/Propa-Punam/Wifi-RSS-Crowdsensing/blob/main/temporary/has_potential.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from google.colab import drive
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Mount Google Drive
drive.mount('/content/drive')

# Read the data
file_path = '/content/drive/My Drive/ss.csv'
data = pd.read_csv(file_path)

print("Initial data shape:", data.shape)
print("Available columns:", data.columns.tolist())
print("\nUnique rooms:", data['room'].unique())

# Separate data by rooms
rooms = data['room'].unique()
train_data = pd.DataFrame(columns=data.columns)  # Initialize with same columns
test_data = pd.DataFrame(columns=data.columns)   # Initialize with same columns

# Debug prints
print("\nBefore splitting:")
for room in rooms:
    print(f"Room {room} count:", len(data[data['room'] == room]))

# Split rooms 203 and 204 into 50% train and test
for room in rooms:
    room_data = data[data['room'] == room]
    if room in ['203', '204', 203, 204]:  # Check for both string and integer room numbers
        # Split into train and test
        train, test = train_test_split(room_data, test_size=0.5, random_state=42)
        train_data = pd.concat([train_data, train])
        test_data = pd.concat([test_data, test])
    else:
        # outside locations go to training
        train_data = pd.concat([train_data, room_data])

# Debug prints
print("\nAfter splitting:")
print("Train data shape:", train_data.shape)
print("Test data shape:", test_data.shape)
print("Train data rooms:", train_data['room'].unique())
print("Test data rooms:", test_data['room'].unique())

# Features: Select all columns except 'StudentID' and 'room'
feature_columns = [col for col in data.columns if col not in ['StudentID', 'room']]
print("\nUsing features:", feature_columns)

# Verify columns exist in both datasets
print("\nColumns in train_data:", train_data.columns.tolist())
print("Columns in test_data:", test_data.columns.tolist())

# Convert room numbers to strings if they're not already
train_data['room'] = train_data['room'].astype(str)
test_data['room'] = test_data['room'].astype(str)

X_train = train_data[feature_columns].values
y_train = train_data['room'].values

if len(test_data) > 0:
    X_test = test_data[feature_columns].values
    y_test = test_data['room'].values
else:
    print("Warning: No test data available!")
    exit()

# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize weights for each room (Kohonen layer)
rooms_in_train = np.unique(y_train)
weight_vectors = {}
learning_rate = 0.1  # Learning rate

# Initialize weights with mean of training samples per room
for room in rooms_in_train:
    room_samples = X_train[y_train == room]
    weight_vectors[room] = np.mean(room_samples, axis=0)

# Training: Adjust weights for each sample
for idx, sample in enumerate(X_train):
    room = y_train[idx]
    # Update the weight vector towards the sample
    weight_vectors[room] += learning_rate * (sample - weight_vectors[room])

# Prediction
def predict_room(sample):
    min_dist = float('inf')
    predicted_room = None
    for room in weight_vectors:
        dist = np.linalg.norm(sample - weight_vectors[room])
        if dist < min_dist:
            min_dist = dist
            predicted_room = room
    return predicted_room

# Test the model
y_pred = [predict_room(sample) for sample in X_test]

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nAccuracy: {accuracy:.2f}")

# Display predictions vs actual
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("\nSample Predictions:")
print(results.head())

# Print confusion details
print("\nPrediction Distribution:")
print(pd.crosstab(results['Actual'], results['Predicted']))

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Initial data shape: (48, 12)
Available columns: ['StudentID', 'room', 'CSE-104', 'DataLab@BUET', 'Galaxy M124213', 'Hall of Fame', 'CSE-206', 'CSE-303', 'CSE-205', 'CSE-204', 'CSE-304', 'CSE-202']

Unique rooms: ['203' '204' 'l1' 'l2' 'l3']

Before splitting:
Room 203 count: 24
Room 204 count: 21
Room l1 count: 1
Room l2 count: 1
Room l3 count: 1

After splitting:
Train data shape: (25, 12)
Test data shape: (23, 12)
Train data rooms: ['203' '204' 'l1' 'l2' 'l3']
Test data rooms: ['203' '204']

Using features: ['CSE-104', 'DataLab@BUET', 'Galaxy M124213', 'Hall of Fame', 'CSE-206', 'CSE-303', 'CSE-205', 'CSE-204', 'CSE-304', 'CSE-202']

Columns in train_data: ['StudentID', 'room', 'CSE-104', 'DataLab@BUET', 'Galaxy M124213', 'Hall of Fame', 'CSE-206', 'CSE-303', 'CSE-205', 'CSE-204', 'CSE-304', 'CSE-202']
Columns in test_data: ['StudentID', 'room', 'CSE-104', 