In [None]:
#imports
import pandas as pd
import numpy as np
import datetime


: 

In [None]:

# Step 1: Preprocess the data

# Load the dataset into a dataframe
df = pd.read_csv('data.csv')
# Compute the number of NaN values in each column
nan_counts = df.isna().sum()

# Compute the number of NaT values in a specific column
nat_count = df['device_local_date'].isnull().sum()

# Print the results
print("NaN counts:")
print(nan_counts)
print("\nNaT count in 'device_local_date' column:")
print(nat_count)

#  Convert 'device_local_date' to datetime format
df['device_local_date'] = pd.to_datetime(df['device_local_date'], format='%Y-%m-%d %H:%M:%S')

#  Extract the time from 'device_local_date' column
df['device_local_time'] = df['device_local_date'].dt.time

#  Extract day of the week
df['day_of_week'] = df['device_local_date'].dt.dayofweek
grouped_df = df.groupby(['shopping_center_id', 'day_of_week'])

grouped_df.head()
grouped_df['device_local_date'].head()


In [None]:

# Step 2: Calculate earliest and latest visit times
opening_hours = {}
for group, data_grouped_df in grouped_df:
    shopping_center_id, day_of_week = group
    min_time = data_grouped_df['device_local_time'].min()
    max_time = data_grouped_df['device_local_time'].max()

    # Calculate opening hours
    opening_hours.setdefault(shopping_center_id, {}).setdefault(day_of_week, (min_time, max_time))

# Step 3: Print opening hours for each shopping center and day of the week
for shopping_center, opening_hours_per_center in opening_hours.items():
    print(f"Shopping Center: {shopping_center}")
    for day_of_week, hours in opening_hours_per_center.items():
        start_time, end_time = hours
        print(f"Day of the week: {day_of_week}")
        print(f"Opening hours: {start_time} - {end_time}\n")


In [None]:
# Step 3: Calculate the average opening hours for each day of the week
average_hours = {}  # Dictionary to store the average opening hours for each day

# Iterate through each day of the week
for day in range(7):
    hours = []  # List to store the opening hours in seconds
        
    # Iterate through each shopping center's opening hours
    for center_id, center_hours_per_day in opening_hours.items():
        if day in center_hours_per_day:
            min_time, max_time = center_hours_per_day[day]
            
            # Calculate the opening duration in seconds
            opening_duration = datetime.datetime.combine(datetime.datetime.min.date(), max_time) - datetime.datetime.combine(datetime.datetime.min.date(), min_time)
            hours.append(opening_duration.total_seconds())
            
            # Convert the opening time to datetime object
            # opening = datetime.datetime.combine(datetime.datetime.min.date(), min_time)
            # min.append(opening.total_seconds())
    
    # Calculate the average opening hours for the current day
    if len(hours) > 0:
        average_hours[day] = pd.Timedelta(seconds=np.mean(hours))
        # average_min[day] = pd.Timedelta(seconds=np.mean(min))
    else:
        average_hours[day] = pd.NaT  # Set NaT if no opening hours found for the current day


In [None]:
# Step 4 :Using this average to predict new center openings

# Specify the new center ID
new_center_id = "b43e9e4f-acd1-4941-874d-e0c5650ab91e"

# Retrieve the data for the new center
new_center_data = df[df['shopping_center_id'] == new_center_id]

# Create a dictionary to store the opening hours for each day
new_center_hours = {}
# Parse the average hours into a list of datetime.time objects
parsed_average_hours = [datetime.datetime.strptime(str(datetime.timedelta(seconds=average_hours[h].seconds)), "%H:%M:%S").time() for h in average_hours]

# Function to calculate the time difference and find the closest hour
def time_difference(z, t):
    # Convert the parsed average time into a timedelta object
    datetime_average = datetime.timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)

    # Initialize the minimum difference with the maximum possible timedelta
    min_diff = datetime.timedelta.max

    # Initialize the indices for the closest hour pair
    min_indices = ()

    # Iterate through the time slots
    for i in range(len(z)):
        # Get the first time slot
        datetime1 = datetime.datetime.combine(datetime.datetime.min.date(), z[i])
        
        # Iterate through the remaining time slots
        for j in range(len(z)):
            # Compare only if the second time slot is earlier than the first one
            if z[i] > z[j]:
                # Get the second time slot
                datetime2 = datetime.datetime.combine(datetime.datetime.min.date(), z[j])

                # Calculate the time difference
                time_diff = abs(datetime1 - datetime2 - datetime_average)

                # Update the minimum difference and indices if a smaller difference is found
                if time_diff < min_diff:
                    min_diff = time_diff
                    min_indices = (i, j)
    
    return min_indices, min_diff


# Iterate through each day of the week
for day in range(7):
    # Check if the day is present in the new center data
    if day in new_center_data['day_of_week'].values:
        # Retrieve the center hours for the current day and reset the index
        center_hours_per_day = new_center_data[new_center_data['day_of_week'] == day]['device_local_time'].reset_index(drop=True)
        
        # Find the closest hour index and ignore the returned minimum difference
        closest_hour_index, _ = time_difference(center_hours_per_day, parsed_average_hours[day])
        
        # Get the closest hour pair using the found indices
        closest_hour = (center_hours_per_day[closest_hour_index[1]], center_hours_per_day[closest_hour_index[0]])
        
        # Store the closest hour pair in the dictionary
        new_center_hours[day] = closest_hour
    else:
        # Set NaT if the day is not present in the new center data
        new_center_hours[day] = pd.NaT


Shopping Center: b43e9e4f-acd1-4941-874d-e0c5650ab91e
Day of the week: 0
Opening hours: 00:43:17 - 22:45:36

Day of the week: 1
Opening hours: 00:14:55 - 23:59:12

Day of the week: 2
Opening hours: 00:06:13 - 22:04:58

Day of the week: 3
Opening hours: 03:05:24 - 23:41:20

Day of the week: 4
Opening hours: 00:14:37 - 23:45:00

Day of the week: 5
Opening hours: 00:07:38 - 23:32:20

Day of the week: 6
Opening hours: 00:07:46 - 23:53:37

In [None]:

# Print the opening hours for the new center
print(f"Opening Hours for New Center ({new_center_id}):")
for day_of_week, hours in new_center_hours.items():
    end_time, start_time = hours
    print(f"Day of the week: {day_of_week}")
    print(f"Opening hours: {start_time} - {end_time}\n")

: 