SIMPLIFY - restructures CSV contents to only contain the number of items and the shopping duration.

In [93]:
import csv

input_file = "ag3.csv"
output_file = "ag3.csv"

with open(input_file, newline='') as csvfile:
    reader = csv.reader(csvfile)
    
    current_list = []
    output_data = []
    
    for row in reader:
        if float(row[1]) == 0:
            # If the timestamp is 0, process the current list and reset
            if current_list:
                # Count the number of items in the list
                num_items = len(current_list)+1
                # Get the last timestamp of the list
                last_timestamp = current_list[-1][1]
                # Add the count and last timestamp to the output data
                output_data.append([num_items, last_timestamp])
                # Reset the current list
                current_list = []
        else:
            # If the timestamp is not 0, add the row to the current list
            current_list.append(row)

    # Process the last list if there is one
    if current_list:
        num_items = len(current_list)+1
        last_timestamp = current_list[-1][1]
        output_data.append([num_items, last_timestamp])

# Write the output data to a new CSV file
with open(output_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows(output_data)


MERGE - merges three different simplified format data.

In [94]:
import pandas as pd

# Read the CSV files
df1 = pd.read_csv('ag3.csv', header=None)
df2 = pd.read_csv('ag3.csv', header=None)
df3 = pd.read_csv('ap3.csv', header=None)

# Merge the second and first columns into one DataFrame
merged = pd.concat([df1.iloc[:, 0], df1.iloc[:, 1], df2.iloc[:, 1], df3.iloc[:, 1]], axis=1)

# Write the merged DataFrame to a new CSV file
merged.to_csv('3.csv', index=False, header=False)

SCALE - scales timegaps to normal speed

In [95]:
import pandas as pd
import random as rd

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv('3.csv', header=None, names=['L', 'NS', 'AG', 'AP'])

# Set the scale factor in seconds (R) as a random value between 110 and 130
R = rd.uniform(110, 130)

# Calculate the new dwell times
df['NS'] = df['NS'] - 10 * df['L'] + R * df['L']
df['AG'] = df['AG'] - 10 * df['L'] + R * df['L']
df['AP'] = df['AP'] - 10 * df['L'] + R * df['L']

# Save the updated DataFrame to a new CSV file without headers
df.to_csv('3.csv', index=False, header=False)


SCALE II - scales timegaps to normal speed.

In [28]:
import pandas as pd
import random as rd

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv('ag0.csv', header=None, names=['Product', 'Dwell Time', 'Quality'])

# Identify the indices where Dwell Time is 0
zero_indices = df[df['Dwell Time'] == 0].index

# Create a dictionary to map each index to its corresponding list length
list_lengths = {}
current_list_length = 0

# Iterate through the indices to calculate lengths
for i in range(len(df)):
    if i in zero_indices:
        current_list_length = 0
    else:
        current_list_length += 1
    list_lengths[i] = current_list_length

# Create a new column 'List Length' based on the dictionary
df['List Length'] = df.index.map(list_lengths)

R = rd.uniform(110,130)
df['Dwell Time'] = df['Dwell Time'] - 10 * df['List Length'] + R * df['List Length']

df.drop('List Length', axis=1, inplace=True)
df.to_csv('ag0.csv', index=False, header=False)


GENERATE DATA - generates random dummy data

In [None]:
import pandas as pd
import numpy as np

# Number of shoppers
num_shoppers = 200

# Generate random data
data = {
    'TotalItems': np.random.randint(5, 21, num_shoppers),  # Random number of items between 5 and 20
    'Algo1Time': np.random.uniform(10, 30, num_shoppers),  # Random time for Algo#1
    'Algo2Time': np.random.uniform(12, 35, num_shoppers)   # Random time for Algo#2
}

# Create a DataFrame
df = pd.DataFrame(data)

# Save the DataFrame to a CSV file
df.to_csv('random_data.csv', index=False, header=False)

# Display the generated DataFrame
print("Random Data:")
print(df)


FILTER - filter list types

In [92]:
import pandas as pd

file_path = '4.csv'
df = pd.read_csv(file_path, header=None)

filtered_df = df[(df.iloc[:, 0] >= 7) & (df.iloc[:, 0] <= 14)]

output_file_path = '4a.csv'

filtered_df.to_csv(output_file_path, index=False, header=False)
