In [None]:
#Generating AP and time seq from cluster 2

import pandas as pd
import csv

# Read the 'csv' file into a DataFrame
cluster1_df = pd.read_csv('kth/cluster1.csv')

# Convert the 'timestamp' column to datetime type
cluster1_df['timestamp'] = pd.to_datetime(cluster1_df['timestamp'])

# Create an empty dictionary to store sequences for each user and date
user_building_ap_sequences = {}
user_timestamp_sequences = {}  # Initialize the user timestamp sequences dictionary


# Loop through each row in the DataFrame
for _, row in cluster1_df.iterrows():
    user_id = row['client']
    timestamp = row['timestamp']
    date = timestamp.date()  # Extract only the date from the timestamp
    
    # Check if the 'AP' column is not missing (i.e., it's a string)
    if not pd.isna(row['AP']):
        building_id, ap_id = row['AP'].split('Bldg')[1].split('AP')

        # Handle negative values in building and AP
        building_id = int(building_id)
        ap_id = int(ap_id)
        if building_id < 0:
            building_id = abs(building_id)
        if ap_id < 0:
            ap_id = abs(ap_id)

        # Remove negative sign if present
        building_id = abs(building_id)
        ap_id = abs(ap_id)
    
        # Create a tuple representing the (building, AP) pair
        ap_tuple = (building_id, ap_id)
    
        # Check if the user and date combination is already in the dictionary
        user_date_key = (user_id, date)
        if user_date_key in user_building_ap_sequences:
            last_item = user_building_ap_sequences[user_date_key][-1]
        
            # Check if the user's last association is different from the current one
            if last_item != ap_tuple:
                user_building_ap_sequences[user_date_key].append(ap_tuple)  # Store only the (building, AP) tuple
                user_timestamp_sequences[user_date_key].append(timestamp)
        else:
            user_building_ap_sequences[user_date_key] = [ap_tuple]  # Store only the (building, AP) tuple
            user_timestamp_sequences[user_date_key] = [timestamp]

# Save the building and AP sequences to a new CSV file
building_ap_output_filename = 'kth/building_ap_sequences.csv'
with open(building_ap_output_filename, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['client', 'date'] + [f'sequence{i+1}' for i in range(max(len(seq) for seq in user_building_ap_sequences.values()))])

    for user_date, seq in user_building_ap_sequences.items():
        user_id, date = user_date
        sequence_values = [f'({building},{ap})' for (building, ap) in seq]
        writer.writerow([user_id, date] + sequence_values)

# Save the timestamp sequences to another new CSV file
timestamp_output_filename = 'kth/timestamp_sequences.csv'
with open(timestamp_output_filename, 'w', newline='') as f:
    writer = csv.writer(f)
    writer.writerow(['client', 'date'] + [f'timestamp{i+1}' for i in range(len(user_timestamp_sequences))])
    for user_date, seq in user_timestamp_sequences.items():
        user_id, date = user_date
        row_data = [str(ts) for ts in seq]
        writer.writerow([user_id, date] + row_data)
