In [1]:
import pandas as pd
import numpy as np
import os

In [3]:
def lat_lon_to_grid_pos(lat, lon, min_lat, max_lat, min_lon, max_lon, grid_size):
    lat_step = (max_lat - min_lat) / grid_size
    lon_step = (max_lon - min_lon) / grid_size
    
    # Calculate grid positions, clamping to the grid size - 1
    lat_pos = min(int((lat - min_lat) / lat_step), grid_size - 1)
    lon_pos = min(int((lon - min_lon) / lon_step), grid_size - 1)
    
    # Convert 2D grid position to a single integer
    grid_pos = lat_pos * grid_size + lon_pos
    return grid_pos


In [4]:

def encode_locations(df, num_lat_bins, num_lon_bins):
    """
    Encode latitude and longitude into a fixed-size grid and calculate the average
    latitude and longitude for each grid cell.
    """
    lat_min, lat_max = df['latitude'].min(), df['latitude'].max()
    lon_min, lon_max = df['longitude'].min(), df['longitude'].max()

    lat_bins = np.linspace(lat_min, lat_max, num_lat_bins + 1)
    lon_bins = np.linspace(lon_min, lon_max, num_lon_bins + 1)

    df['lat_bin'] = pd.cut(df['latitude'], bins=lat_bins, labels=False, include_lowest=True)
    df['lon_bin'] = pd.cut(df['longitude'], bins=lon_bins, labels=False, include_lowest=True)
    
    df['location_id'] = df['lat_bin'] * num_lon_bins + df['lon_bin'] + 1

    return df

In [5]:
df = pd.read_csv(r'C:\Users\ss6365\Desktop\location_privacy_final\collected\data\merged_all_security_subset_3km.csv')
df

Unnamed: 0,longitude,latitude,identifier
0,-77.680333,43.083838,1
1,-77.680991,43.083803,1
2,-77.681017,43.083802,1
3,-77.681042,43.083802,1
4,-77.681090,43.083802,1
...,...,...,...
55420,-77.680445,43.083879,46
55421,-77.680442,43.083868,46
55422,-77.680441,43.083863,46
55423,-77.680442,43.083860,46


In [6]:
lat_min, lat_max = df['latitude'].min(), df['latitude'].max()
, lon_max = df['longitude'].min(), df['longitude'].max()

In [7]:
lat_min

43.0661955

In [13]:

def convert_to_location_id_and_save(base_directory, grid_size, min_lat, max_lat, min_lon, max_lon):
    # Walk through the directory tree
    for dirpath, dirnames, filenames in os.walk(base_directory):
        for file in filenames:
            if file.endswith('.csv'):  # Check if the file is a CSV
                file_path = os.path.join(dirpath, file)
                df = pd.read_csv(file_path)
                # Apply the conversion
                df['location_id'] = df.apply(lambda row: lat_lon_to_grid_pos(row['latitude'], row['longitude'],
                                                                              min_lat, max_lat, min_lon, max_lon, grid_size), axis=1)
                # Save the updated DataFrame back to CSV without changing its name
                df.to_csv(file_path, index=False)

# Example usage parameters (you need to define lat_range and lon_range)
grid_size = 500
#base_directory = 'C:\\Users\\ss6365\\Desktop\\11111\\Synthetic\\Perturbed_Averaged\\Laplace'

base_directory = r'C:\Users\ss6365\Desktop\location_privacy_final\test\data\security'

convert_to_location_id_and_save(base_directory, grid_size, lat_min, lat_max, lon_min, lon_max)

In [14]:
def transform_csv_files(directory):
    # Iterate through each CSV file in the directory
    for csv_file in os.listdir(directory):
        file_path = os.path.join(directory, csv_file)
        df = pd.read_csv(file_path)

        # Assuming the DataFrame structure matches the uploaded file
        # Transform the DataFrame to create a new structure
        transformed_data = {
            'Latitude_A': df.loc[0, 'latitude'],
            'Longitude_A': df.loc[0, 'longitude'],
            'Latitude_B': df.loc[1, 'latitude'],
            'Longitude_B': df.loc[1, 'longitude'],
            #'Perturbed_Latitude_A': df.loc[0, 'reported_lat'],
            #'Perturbed_Longitude_A': df.loc[0, 'reported_lon'],
            #'Perturbed_Latitude_B': df.loc[1, 'reported_lat'],
            #'Perturbed_Longitude_B': df.loc[1, 'reported_lon'],
            'Location_ID_A': df.loc[0, 'location_id'],
            'Location_ID_B': df.loc[1, 'location_id'],
            'Identifier': df.loc[0, 'identifier'], 
            #'Perturbed_ID_A':df.loc[0, 'P_Location_ID'],
            #'Perturbed_ID_B':df.loc[1, 'P_Location_ID'],# Assuming the same identifier for both points
        }

        # Create a DataFrame for the transformed data
        transformed_df = pd.DataFrame([transformed_data])

        # Save the transformed DataFrame to a new CSV file
        transformed_file_path = os.path.join(directory, f"{csv_file}")
        transformed_df.to_csv(transformed_file_path, index=False)

# The directory where the uploaded file is stored
directory = r"C:\Users\ss6365\Desktop\location_privacy_final\test\data\security"

# Apply the transformation to all CSV files in the directory
transform_csv_files(directory)

# To verify, list the files in the directory after transformation
os.listdir(directory)

['Hu_Samsung_S22_csv-1706551395849_3km_part1.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part10.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part11.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part12.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part13.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part14.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part15.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part2.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part3.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part4.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part5.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part6.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part7.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part8.csv',
 'Hu_Samsung_S22_csv-1706551395849_3km_part9.csv']