<a href="https://colab.research.google.com/github/Propa-Punam/Wifi-RSS-Crowdsensing/blob/main/data%20preprocessing/count_strenth_number_per_macaddress.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import required libraries
import pandas as pd
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive')

# Define paths for input and output files in Google Drive
input_path_entries = '/content/drive/My Drive/entries.csv'
input_path_scans = '/content/drive/My Drive/scan_lists.csv'
output_path_filtered = '/content/drive/My Drive/filtered_wifi_data.csv'
output_path_unique_macs = '/content/drive/My Drive/unique_mac_addresses.csv'
output_path_strengths = '/content/drive/My Drive/mac_strengths_per_scan_203.csv'  # New output file

# Read the CSV files
entries_df = pd.read_csv(input_path_entries)
scans_df = pd.read_csv(input_path_scans)

# List of SSIDs to filter
target_ssids = [
    "CSE-206", "CSE-104", "CSE-202", "CSE-205",
    "CSE-304", "CSE-204", "CSE-303", "DataLab@BUET",
    "CSE-214", "CSE-G04", "CSE-401", "CSE-306"
]

# Create lists to store the filtered data and unique MAC addresses
filtered_data = []
mac_ssid_pairs = set()  # Using a set to store unique MAC-SSID pairs

# Dictionary to store strengths per MAC address for each scan group
mac_strengths = {}

# Iterate through each entry in entries.csv
for _, entry_row in entries_df.iterrows():
    entry_id = entry_row['entryId']
    student_id = entry_row['StudentID']

    # Get all scans for this entry_id
    entry_scans = scans_df[scans_df['entryId'] == entry_id]

    # Iterate through each scan group (0 to 19)
    for scan_group in range(20):
        group_scans = entry_scans[entry_scans['scanGroupIndex'] == scan_group]

        # Filter for target SSIDs
        for _, scan_row in group_scans.iterrows():
            ssid = scan_row['SSID']
            if ssid in target_ssids:
                # Add to filtered data (for first output)
                filtered_data.append({
                    'entryId': entry_id,
                    'StudentID': student_id,
                    'scanGroupIndex': scan_group,
                    'MacAddress': scan_row['MacAddress'],
                    'SSID': ssid,
                    'Strength': scan_row['Strength']
                })

                # Add to unique MAC-SSID pairs
                mac_ssid_pairs.add((scan_row['MacAddress'], ssid))

                # Add to mac_strengths dictionary
                mac_address = scan_row['MacAddress']
                if mac_address not in mac_strengths:
                    mac_strengths[mac_address] = {'SSID': ssid, 'strengths': [None] * 20, 'count': 0}
                mac_strengths[mac_address]['strengths'][scan_group] = scan_row['Strength']
                mac_strengths[mac_address]['count'] += 1

# Convert filtered data to DataFrame (first output)
output_df_filtered = pd.DataFrame(filtered_data)
output_df_filtered = output_df_filtered.sort_values(['entryId', 'scanGroupIndex'])

# Convert unique MAC-SSID pairs to DataFrame (second output)
unique_macs_data = [{'MacAddress': mac, 'SSID': ssid} for mac, ssid in mac_ssid_pairs]
output_df_unique_macs = pd.DataFrame(unique_macs_data)
output_df_unique_macs = output_df_unique_macs.sort_values('SSID')

# Create DataFrame for strengths per scan group (third output)
strengths_data = []
for mac_address, data in mac_strengths.items():
    row = {
        'MacAddress': mac_address,
        'SSID': data['SSID'],
        'StrengthCount': data['count']
    }
    # Add strengths for each scan group
    for i in range(20):
        row[f'ScanIndex_{i}_Strength'] = data['strengths'][i]
    strengths_data.append(row)

output_df_strengths = pd.DataFrame(strengths_data)

# Save all DataFrames to CSV files in Google Drive
output_df_filtered.to_csv(output_path_filtered, index=False)
output_df_unique_macs.to_csv(output_path_unique_macs, index=False)
output_df_strengths.to_csv(output_path_strengths, index=False)

# Print summary information
print(f"Filtered data has been saved to {output_path_filtered}")
print(f"Total records in filtered data: {len(output_df_filtered)}")
print(f"\nUnique MAC addresses have been saved to {output_path_unique_macs}")
print(f"Total unique MAC-SSID pairs: {len(output_df_unique_macs)}")
print(f"\nStrengths per scan group have been saved to {output_path_strengths}")
print(f"Total MAC addresses in strengths data: {len(output_df_strengths)}")

# Display the first few rows of all outputs
print("\nFirst few rows of filtered data:")
print(output_df_filtered.head())
print("\nFirst few rows of unique MAC addresses:")
print(output_df_unique_macs.head())
print("\nFirst few rows of strengths per scan group:")
print(output_df_strengths.head())

Mounted at /content/drive
Filtered data has been saved to /content/drive/My Drive/filtered_wifi_data.csv
Total records in filtered data: 1930

Unique MAC addresses have been saved to /content/drive/My Drive/unique_mac_addresses.csv
Total unique MAC-SSID pairs: 13

Strengths per scan group have been saved to /content/drive/My Drive/mac_strengths_per_scan_203.csv
Total MAC addresses in strengths data: 13

First few rows of filtered data:
   entryId  StudentID  scanGroupIndex         MacAddress          SSID  \
0        1    2005045               0  bc:22:28:21:09:24       CSE-205   
1        1    2005045               0  bc:22:28:21:09:21       CSE-205   
2        1    2005045               0  30:b5:c2:ea:5b:44  DataLab@BUET   
3        1    2005045               0  60:63:4c:31:37:40       CSE-303   
4        1    2005045               1  bc:22:28:21:09:24       CSE-205   

   Strength  
0       -50  
1       -63  
2       -68  
3       -58  
4       -50  

First few rows of unique MAC a