In [27]:
import pandas as pd

dataset1 = pd.read_csv(r"C:\Users\manoj\OneDrive\Desktop\MapUp-Data-Assessment-F-main\datasets\dataset-1.csv")
df = pd.DataFrame(dataset1)

In [31]:
import pandas as pd

def generate_car_matrix(df) -> pd.DataFrame:
    """
    Creates a DataFrame for id combinations.

    Args:
        df (pandas.DataFrame)

    Returns:
        pandas.DataFrame: Matrix generated with 'car' values,
                          where 'id_1' and 'id_2' are used as indices and columns respectively
    """
    df = pd.DataFrame(dataset1)
    matrix = df.pivot(index='id_1', columns='id_2', values='car').fillna(0)
    
    for col in matrix.columns:
        matrix.loc[matrix.index == col, col] = 0
    
    return df

In [32]:
def get_type_count(df):
    bins = [-float('inf'), 15, 25, float('inf')]
    labels = ['low', 'medium', 'high']
    df['car_type'] = pd.cut(df['car'], bins=bins, labels=labels)
    return df['car_type'].value_counts().sort_index().to_dict()


In [33]:
def get_bus_indexes(df):
    mean_bus = df['bus'].mean()
    return df[df['bus'] > 2 * mean_bus].index.tolist()


In [34]:
def filter_routes(df):
    avg_truck_by_route = df.groupby('route')['truck'].mean()
    return avg_truck_by_route[avg_truck_by_route > 7].index.tolist()


In [35]:
def multiply_matrix(matrix):
    return matrix.applymap(lambda x: x * 2 if x > 10 else x)


In [37]:
def time_check(df):
    df['timestamp'] = pd.to_datetime(df['timestamp'])
    time_diff = df.groupby(['id', 'id_2'])['timestamp'].agg(lambda x: (x.max() - x.min()).total_seconds())
    return (time_diff >= 24 * 3600 * 7).reset_index(name='check')


In [43]:

from datetime import datetime, timedelta

def verify_timestamp_completeness(df):
    
    df['start_timestamp'] = pd.to_datetime(df['startDay'] + ' ' + df['startTime'])
    df['end_timestamp'] = pd.to_datetime(df['endDay'] + ' ' + df['endTime'])

    df['duration'] = df['end_timestamp'] - df['start_timestamp']

    total_seconds_in_day = 24 * 60 * 60

    df['full_day'] = df['duration'] >= timedelta(seconds=total_seconds_in_day)

    df['day_of_week'] = df['start_timestamp'].dt.dayofweek

    grouped = df.groupby(['id', 'id_2'])

    completeness = grouped.apply(lambda x: x.groupby('day_of_week')['full_day'].count().eq(0).sum() != 7)
    
    return completeness
