In [294]:
import pandas as pd
import numpy as np
from datetime import datetime

### Function

In [295]:
def read_until_null_excel(file_path, sheet_name):
    df = pd.read_excel(file_path, sheet_name=sheet_name)
    null_row_index = df.isnull().all(axis=1).idxmax() if df.isnull().all(axis=1).any() else None

    if null_row_index is not None:
        df = df.iloc[:null_row_index]
    return df

def is_group_in_item(item, group):
    item_str = str(item)
    group_str = str(group)
    return group_str in item_str
def map_group_numbers(df1, df2, item_column, vendor_column, groups_column):
    result = []
    for _, row in df1.iterrows():
        item = row[item_column]
        vendor = str(row[vendor_column])
        matched_group = ''
        for _, group_row in df2.iterrows():
            group_vendor = str(group_row[vendor_column])
            if group_vendor == vendor:
                group = group_row[groups_column]
                if is_group_in_item(item, group):
                    matched_group = group
                    print(f"Match found: Item '{item}' (Vendor: '{vendor}') matches Group '{group}'")
                    break
        result.append(matched_group)
        if not matched_group:
            print(f"No match for Item '{item}' (Vendor: '{vendor}')")
    return result

def first_non_null_column_name(df, start_col, end_col, new_col_name):
    columns_to_check = df.loc[:, start_col:end_col].columns

    def find_first_non_null(row):
        for col in columns_to_check:
            if pd.notnull(row[col]) and row[col] != 0:
                return col
        return None

    df[new_col_name] = df.apply(find_first_non_null, axis=1)
    return df

#Sum pairs
def sum_pairs(df):
    # Initialize an empty list to store the results
    result_list = []
    
    # Separate numeric and non-numeric columns
    numeric_cols = df.select_dtypes(include='number').columns
    non_numeric_cols = df.select_dtypes(exclude='number').columns
    
    # Iterate through the DataFrame in steps of 2
    for i in range(0, len(df), 2):
        # Check if the next row exists
        if i + 1 < len(df):
            # Sum the current row and the next row for numeric columns
            summed_row = df.iloc[i:i+2][numeric_cols].sum()
            # Keep the non-numeric column from the first row
            non_numeric = df.iloc[i][non_numeric_cols]
            # Combine numeric and non-numeric results
            combined = pd.concat([summed_row, non_numeric])
            # Append the result to the list
            result_list.append(combined)
    
    # Convert the list of results to a DataFrame
    result = pd.DataFrame(result_list)
    
    return result

### Import File

In [296]:
df_2 = pd.read_csv('Production ScheduleDoLuong21551.csv', skiprows = 6)
df_2['Vendor #'] = df_2['Vendor #'].astype(float)

In [297]:
file_input = 'April HP 2024 Price Confirmation Roll-Upv 7.28.xlsx'
sheet_name = 'April HP 2024 Price Confirmatio'
df_1 = read_until_null_excel(file_input, sheet_name)

  warn("Workbook contains no default style, apply openpyxl's default")


### Modified df_1 (File SmartSheet)

In [298]:
vendor_take = df_1['Vendor #'].unique()
df_2 = df_2[df_2['Vendor #'].isin(vendor_take)]

In [299]:
split_columns = df_1['Group Number'].str.split(' ', expand=True)

In [300]:
if split_columns.shape == 2:
    split_columns.columns = ['Group Number Split','Additional Component']
else:
    split_columns = split_columns.rename(columns={0: 'Group Number Split'})
    split_columns['Additional Component'] = None

In [301]:
df_1 = pd.concat([df_1, split_columns], axis=1)
group_number_list = df_1['Group Number Split'].unique().tolist()
df_1.drop(columns=['Group Number'], inplace=True)
df_1 = df_1.rename(columns={'Group Number Split': 'Group Number'})

### Modified df_2 (File System)

In [302]:
df_2['Item #'] = df_2['Item #'].str.replace('="', '').str.replace('"', '')

In [303]:
df_2_sample = df_2.loc[(df_2['S/F/P'] == 'F') | (df_2['S/F/P'] == 'S')]
df_2_sample = df_2_sample.loc[:,df_2_sample.columns[4]:]

In [304]:
df_2_sample  = df_2_sample.astype(float)

In [305]:
df = pd.DataFrame(df_2_sample)
result = sum_pairs(df)

In [306]:
df_2['Item #'] = df_2['Item #'].str.replace('="', '').str.replace('"', '')
df_2 = df_2.loc[df_2['S/F/P'] == 'F']

In [307]:
df_2.iloc[:, 4:].astype(float)
df_2.iloc[:, 4:] = np.nan

In [308]:
df_2.iloc[:,4:] = result

In [309]:
item_column = 'Item #'
vendor_column = 'Vendor #'
groups_column = 'Group Number'
df_2['Group Number'] = map_group_numbers(df_2, df_1, item_column, vendor_column, groups_column)


No match for Item '01134' (Vendor: '900352.0')
No match for Item '01222' (Vendor: '900352.0')
No match for Item '01235' (Vendor: '900352.0')
No match for Item '01266' (Vendor: '900352.0')
No match for Item '01279' (Vendor: '900352.0')
No match for Item '01281' (Vendor: '900352.0')
No match for Item '01282' (Vendor: '900352.0')
No match for Item '02193' (Vendor: '900352.0')
No match for Item '03034' (Vendor: '900352.0')
No match for Item '03948' (Vendor: '900352.0')
No match for Item '04483' (Vendor: '900352.0')
No match for Item '04484' (Vendor: '900352.0')
No match for Item '09646' (Vendor: '900352.0')
No match for Item '09697' (Vendor: '900352.0')
No match for Item '100-17' (Vendor: '617616.0')
No match for Item '100-18' (Vendor: '617616.0')
No match for Item '100-62' (Vendor: '617616.0')
No match for Item '100-62' (Vendor: '617616.0')
No match for Item '100-62' (Vendor: '617616.0')
No match for Item '10516565' (Vendor: '900352.0')
No match for Item '10517693' (Vendor: '900352.0')
No

In [310]:
df_2['Group Number'].value_counts()

Group Number
            4584
D546         122
B944          84
D642          65
B895          63
D974          56
P704          42
D634          38
P572          36
D833          27
D423          24
T734          21
D496          18
D494          18
D476          17
T679          16
T632          16
D622          16
T574          14
T551          10
D501           8
T304           7
A8010384       7
A8010383       7
T313           5
T315           2
T280           2
T285           2
Name: count, dtype: int64

### Create File Check

In [311]:
df_check = df_2.loc[df_2['Group Number'] != '']

In [312]:
df_check

Unnamed: 0,Item #,Whse,Vendor #,S/F/P,7/20,7/27,8/3,8/10,8/17,8/24,...,10/12,10/19,10/26,11/2,11/9,11/16,11/23,11/30,12/7,Group Number
60490,A8010383,1,622820.0,F,0.0,0.0,0.0,0.0,0.0,133.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A8010383
60493,A8010383,15,622820.0,F,0.0,0.0,0.0,0.0,0.0,144.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A8010383
60496,A8010383,17,622820.0,F,0.0,0.0,0.0,0.0,0.0,140.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A8010383
60499,A8010383,28,622820.0,F,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A8010383
60502,A8010383,335,622820.0,F,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,A8010383
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
129982,T734-3,17,632409.0,F,0.0,0.0,0.0,200.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T734
129985,T734-3,28,632409.0,F,0.0,0.0,0.0,0.0,0.0,100.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T734
129988,T734-3,335,632409.0,F,0.0,0.0,0.0,0.0,0.0,60.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T734
129991,T734-3,5,632409.0,F,0.0,0.0,80.0,0.0,0.0,92.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,T734


In [313]:
df_check.loc[:, 'Arcadia ETD System'] = np.where(df_check['Whse'].isin({'1'}), 1, 0)
df_check.loc[:, 'EC ETD System'] = np.where(df_check['Whse'].isin({'15', '17', 'ECR'}), 1, 0)
df_check.loc[:, 'WC ETD System'] = np.where(df_check['Whse'].isin({'42', '28', '5'}), 1, 0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_check.loc[:, 'Arcadia ETD System'] = np.where(df_check['Whse'].isin({'1'}), 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_check.loc[:, 'EC ETD System'] = np.where(df_check['Whse'].isin({'15', '17', 'ECR'}), 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_check.loc[:, 'WC ETD Sy

In [314]:
df_check_v2  = pd.merge(df_check, df_1[['Vendor #','Group Number','Additional Component','Arcadia ETD','EC ETD', 'WC ETD','Categories']], on =['Group Number','Vendor #'], how = 'left')

In [315]:
df_check_v2

Unnamed: 0,Item #,Whse,Vendor #,S/F/P,7/20,7/27,8/3,8/10,8/17,8/24,...,12/7,Group Number,Arcadia ETD System,EC ETD System,WC ETD System,Additional Component,Arcadia ETD,EC ETD,WC ETD,Categories
0,A8010383,1,622820.0,F,0.0,0.0,0.0,0.0,0.0,133.0,...,0.0,A8010383,1,0,0,,2024-08-24,2024-08-24,2024-09-07,Accessories
1,A8010383,15,622820.0,F,0.0,0.0,0.0,0.0,0.0,144.0,...,0.0,A8010383,0,1,0,,2024-08-24,2024-08-24,2024-09-07,Accessories
2,A8010383,17,622820.0,F,0.0,0.0,0.0,0.0,0.0,140.0,...,0.0,A8010383,0,1,0,,2024-08-24,2024-08-24,2024-09-07,Accessories
3,A8010383,28,622820.0,F,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,A8010383,0,0,1,,2024-08-24,2024-08-24,2024-09-07,Accessories
4,A8010383,335,622820.0,F,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,A8010383,0,0,0,,2024-08-24,2024-08-24,2024-09-07,Accessories
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,T734-3,17,632409.0,F,0.0,0.0,0.0,200.0,0.0,0.0,...,0.0,T734,0,1,0,,2024-08-10,2024-08-10,2024-08-24,Occasional
739,T734-3,28,632409.0,F,0.0,0.0,0.0,0.0,0.0,100.0,...,0.0,T734,0,0,1,,2024-08-10,2024-08-10,2024-08-24,Occasional
740,T734-3,335,632409.0,F,0.0,0.0,0.0,0.0,0.0,60.0,...,0.0,T734,0,0,0,,2024-08-10,2024-08-10,2024-08-24,Occasional
741,T734-3,5,632409.0,F,0.0,0.0,80.0,0.0,0.0,92.0,...,0.0,T734,0,0,1,,2024-08-10,2024-08-10,2024-08-24,Occasional


In [93]:
df_check_v3 = first_non_null_column_name(df_check_v2, ' 7/20', ' 12/14', 'Date_First_Value')

KeyError: ' 12/14'

In [None]:
current_year = datetime.now().strftime('%Y')
df_check_v3['Date_First_Value'] = df_check_v3['Date_First_Value'].str.strip()
df_check_v3['Date_First_Value']= df_check_v3['Date_First_Value'] + '/' + current_year

In [None]:
df_check_v3['Date_First_Value'] = pd.to_datetime(df_check_v3['Date_First_Value'], format='%m/%d/%Y', errors='coerce')
df_check_v3['Date_First_Value'] = df_check_v3['Date_First_Value'].dt.strftime('%Y-%m-%d')

In [None]:
df_check_v3['Arcadia ETD System Final'] = np.where(df_check_v3['Arcadia ETD System'] == 1, df_check_v3['Date_First_Value'],0)
df_check_v3['EC ETD System Final'] = np.where(df_check_v3['EC ETD System'] == 1, df_check_v3['Date_First_Value'],0)
df_check_v3['WC ETD System Final'] = np.where(df_check_v3['WC ETD System'] == 1, df_check_v3['Date_First_Value'],0)

In [None]:
df_filtered = df_check_v3[['Categories','Item #','Whse','Group Number','Additional Component','Vendor #','Arcadia ETD System','EC ETD System','WC ETD System','Arcadia ETD System Final','EC ETD System Final','WC ETD System Final', 'Arcadia ETD','EC ETD','WC ETD']]

In [None]:
df_filtered['Arcadia ETD'] = df_filtered['Arcadia ETD'].dt.strftime('%Y-%m-%d')
df_filtered['EC ETD'] = df_filtered['EC ETD'].dt.strftime('%Y-%m-%d')
df_filtered['WC ETD'] = df_filtered['WC ETD'].dt.strftime('%Y-%m-%d')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Arcadia ETD'] = df_filtered['Arcadia ETD'].dt.strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['EC ETD'] = df_filtered['EC ETD'].dt.strftime('%Y-%m-%d')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['WC ETD'] = df_filtered['WC ETD'].dt.strfti

In [None]:
df_filtered['Arcadia ETD Smartsheet'] = np.where(df_filtered['Arcadia ETD System'] == 1, df_filtered['Arcadia ETD'], 0)
df_filtered['EC ETD Smartsheet'] = np.where(df_filtered['EC ETD System'] == 1, df_filtered['EC ETD'], 0)
df_filtered['WC ETD Smartsheet'] = np.where(df_filtered['WC ETD System'] == 1, df_filtered['WC ETD'], 0)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['Arcadia ETD Smartsheet'] = np.where(df_filtered['Arcadia ETD System'] == 1, df_filtered['Arcadia ETD'], 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['EC ETD Smartsheet'] = np.where(df_filtered['EC ETD System'] == 1, df_filtered['EC ETD'], 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-v

In [None]:
df_filtered_next = df_filtered[['Item #','Whse','Group Number','Additional Component','Vendor #','Arcadia ETD System Final','EC ETD System Final','WC ETD System Final','Arcadia ETD Smartsheet','EC ETD Smartsheet','WC ETD Smartsheet']]

In [None]:
df_filtered_next['Check True/False'] = np.where(
    (df_filtered_next['Arcadia ETD System Final'] == df_filtered_next['Arcadia ETD Smartsheet']) &
    (df_filtered_next['EC ETD System Final'] == df_filtered_next['EC ETD Smartsheet']) &
    (df_filtered_next['WC ETD System Final'] == df_filtered_next['WC ETD Smartsheet']),
    'True', 'False'
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered_next['Check True/False'] = np.where(


In [None]:
df_filtered_next

Unnamed: 0,Item #,Whse,Group Number,Additional Component,Vendor #,Arcadia ETD System Final,EC ETD System Final,WC ETD System Final,Arcadia ETD Smartsheet,EC ETD Smartsheet,WC ETD Smartsheet,Check True/False
0,A3000693,1,A3000693,,626704.0,2024-08-17,0,0,2024-08-10,0,0,False
1,A3000693,15,A3000693,,626704.0,0,2024-08-17,0,0,2024-08-10,0,False
2,A3000693,17,A3000693,,626704.0,0,2024-08-17,0,0,2024-08-10,0,False
3,A3000693,28,A3000693,,626704.0,0,0,2024-08-24,0,0,2024-08-24,True
4,A3000693,42,A3000693,,626704.0,0,0,2024-08-24,0,0,2024-08-24,True
...,...,...,...,...,...,...,...,...,...,...,...,...
2331,W769-68,28,W769-68,,609886.0,0,0,2024-08-24,0,0,2024-07-06,False
2332,W769-68,42,W769-68,,609886.0,0,0,,0,0,2024-07-06,False
2333,W769-68,5,W769-68,,609886.0,0,0,,0,0,2024-07-06,False
2334,W769-68,CNW,W769-68,,609886.0,0,0,0,0,0,0,True
