<a href="https://colab.research.google.com/github/Propa-Punam/Wifi-RSS-Crowdsensing/blob/main/main%20data%20process%20/generating_student_vector_of_ssid_and_strength.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
from google.colab import drive
import pandas as pd
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define the allowed SSIDs
allowed_ssids = {
    "CSE-206", "CSE-104", "CSE-202", "CSE-205",
    "CSE-304", "CSE-204", "CSE-303", "DataLab@BUET",
    "CSE-214", "CSE-G04", "CSE-401", "CSE-306"
}

def create_simple_vectors(base_path="/content/drive/My Drive"):
    # Folder paths
    folders = ['203', '204', 'outside']
    all_entries = pd.DataFrame()
    all_scans = pd.DataFrame()

    # Dictionary to store student room assignments
    student_rooms = {}

    # Read data from all folders
    for folder in folders:
        folder_path = os.path.join(base_path, folder)
        try:
            entries = pd.read_csv(os.path.join(folder_path, "entries.csv"))
            scans = pd.read_csv(os.path.join(folder_path, "scan_lists.csv"))

            # Add room information for each student in this folder
            for student_id in entries['StudentID']:
                student_rooms[student_id] = folder

            all_entries = pd.concat([all_entries, entries])
            all_scans = pd.concat([all_scans, scans])
        except FileNotFoundError:
            print(f"Warning: Could not find CSV files in {folder}")

    # Create student vectors
    student_vectors = {}

    # Process each student's data
    for _, entry in all_entries.iterrows():
        student_id = entry['StudentID']
        entry_id = entry['entryId']

        # Get scans for this entry
        student_scans = all_scans[all_scans['entryId'] == entry_id]

        # Initialize student vector if not exists
        if student_id not in student_vectors:
            student_vectors[student_id] = {
                'room': student_rooms.get(student_id, 'unknown'),  # Add room information
                **{ssid: -100 for ssid in allowed_ssids}  # Add SSID strengths
            }

        # Add scan data to student vector
        for _, scan in student_scans.iterrows():
            ssid = scan['SSID']
            strength = scan['Strength']

            # Only consider allowed SSIDs
            if ssid in allowed_ssids:
                student_vectors[student_id][ssid] = strength

    return student_vectors

# Create vectors
student_vectors = create_simple_vectors()

# Convert the student_vectors dictionary to a DataFrame
student_vectors_df = pd.DataFrame.from_dict(student_vectors, orient='index')

# Reset index to make StudentID a column
student_vectors_df.reset_index(inplace=True)
student_vectors_df.rename(columns={'index': 'StudentID'}, inplace=True)

# Relabel "outside" entries as l1, l2, l3
outside_students = student_vectors_df[student_vectors_df['room'] == 'outside']
outside_labels = ['l1', 'l2', 'l3'][:len(outside_students)]  # Assign labels based on count

# Update room labels
student_vectors_df.loc[outside_students.index, 'room'] = outside_labels

# Drop columns where all values are -100
cols_to_drop = [col for col in student_vectors_df.columns if student_vectors_df[col].eq(-100).all()]
student_vectors_df.drop(columns=cols_to_drop, inplace=True)

# Reorder columns to put room first
cols = ['StudentID', 'room'] + [col for col in student_vectors_df.columns if col not in ['StudentID', 'room']]
student_vectors_df = student_vectors_df[cols]

# Save the DataFrame to a CSV file
output_path = "/content/drive/My Drive/student_vectors.csv"
student_vectors_df.to_csv(output_path, index=False)

print(f"Student vectors saved to {output_path}")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Student vectors saved to /content/drive/My Drive/student_vectors.csv
