In [8]:
import pandas as pd

In [9]:
pn_cn = pd.read_csv('./pcr_v2.csv')
pn_cn.head()

Unnamed: 0,PN_Date,CN_Date,Pick_Note,Credit_Note,Key,Vendor,PN_Qty,CN_Qty,PN_Remaining,CN_Remaining,Original_PN_Qty,Original_CN_Qty
0,2024-11-23,2024-11-25,PR201008895,DN201006195,322_11299_SIF1190A,AHMEDABAD MEDICAL CORPORATION,1.0,1.0,0.0,0.0,1,1.0
1,2024-11-20,2024-11-22,PR201008882,DN201006163,322_11303_SIF1510A,AHMEDABAD MEDICAL CORPORATION,1.0,1.0,0.0,0.0,1,1.0
2,2024-11-16,2024-11-19,PR201008755,DN201006186,322_1178_FHA0249,AHMEDABAD MEDICAL CORPORATION,6.0,6.0,0.0,0.0,6,6.0
3,2024-12-28,2024-12-28,PR201009347,DN201006646,322_13460_SIF1920A,AHMEDABAD MEDICAL CORPORATION,6.0,6.0,0.0,0.0,6,6.0
4,2024-11-06,2024-11-19,PR201008535,DN201006186,322_13492_BA31427,AHMEDABAD MEDICAL CORPORATION,1.0,1.0,0.0,0.0,1,1.0


In [10]:
# Using value_counts() to count the occurence of each key of the key column in the pn_cn DataFrame and take out each key with occurence greater than 1.

X = pn_cn['Key'].value_counts()[pn_cn['Key'].value_counts() > 2]

# Saving to a text file
X.to_csv('keys.txt', header=False)

In [7]:
pn_cn.to_csv("MultiKey.csv", index=False)

In [None]:
import pandas as pd

def process_mapping2():
    # Load and prepare data
    pn = pd.read_csv('pn_data.csv')
    cn = pd.read_csv('cn_data.csv')
    
    # Clean and convert dates
    pn['date'] = pd.to_datetime(pn['date'].replace('-', pd.NaT), format='%d-%m-%Y', errors='coerce')
    cn['vendor_cn_date'] = pd.to_datetime(cn['vendor_cn_date'].replace('-', pd.NaT), format='%Y-%m-%d', errors='coerce')
    
    # Remove invalid dates and sort 
    pn = pn.dropna(subset=['date']).sort_values(['key', 'date'])
    cn = cn.dropna(subset=['vendor_cn_date']).sort_values(['key', 'vendor_cn_date'])
    
    

    # An array for storing mapped records.
    mapped_records = []
    
    # Get all unique keys from both dataframes. Basically a set with all the possible keys
    all_keys = set(pn['key'].unique()) | set(cn['key'].unique())
    
    # for each key 
    for key in all_keys:

        # Key_pns is a dataframe which stores the entire data of pn corresponding to the current key. Similar is for cns
        key_pns = pn[pn['key'] == key].reset_index(drop=True)
        key_cns = cn[cn['key'] == key].reset_index(drop=True)
        
        # Handle key present in only one dataset
        if len(key_pns) == 0:
            # Key only in CN data
            for i in range(len(key_cns)):
                cn_row = key_cns.iloc[i]
                mapped_records.append({
                    'PN_Date': None,
                    'CN_Date': cn_row['vendor_cn_date'],
                    'Pick_Note': None,
                    'Credit_Note': cn_row['debit_note_number'],
                    'Key': key,
                    'Vendor': cn_row['vendor_name'],
                    'Original_PN_Qty': 0,
                    'Original_CN_Qty': cn_row['billed_quantity'],
                    'PN_Qty': 0,
                    'CN_Qty': cn_row['billed_quantity'],
                    'PN_Remaining': 0,  # Already 0 since no PN exists
                    'CN_Remaining': cn_row['billed_quantity']  # Set remaining to original qty
                })
            continue
            
        if len(key_cns) == 0:
            # Key only in PN data
            for i in range(len(key_pns)):
                pn_row = key_pns.iloc[i]
                mapped_records.append({
                    'PN_Date': pn_row['date'],
                    'CN_Date': None,
                    'Pick_Note': pn_row['pick_note_number'],
                    'Credit_Note': None,
                    'Key': key,
                    'Vendor': pn_row['vendor_name'],
                    'Original_PN_Qty': pn_row['quantity'],
                    'Original_CN_Qty': 0,
                    'PN_Qty': pn_row['quantity'],
                    'CN_Qty': 0,
                    'PN_Remaining': pn_row['quantity'],  # Set remaining to original qty
                    'CN_Remaining': 0  # Already 0 since no CN exists
                })
            continue
                
        # Rest of your existing mapping logic for when key exists in both datasets
        pn_qty_remaining = 0
        cn_qty_remaining = 0
        max_rows = max(len(key_pns), len(key_cns))
        
        pick_notes = []
        credit_notes = []
        
        for i in range(max_rows):
            has_pn = i < len(key_pns)
            has_cn = i < len(key_cns)
            
            if has_pn:
                pn_row = key_pns.iloc[i]
                pn_qty = pn_row['quantity'] + pn_qty_remaining
                pick_notes.append(pn_row['pick_note_number'])
            else:
                pn_qty = pn_qty_remaining
            
            if has_cn:
                cn_row = key_cns.iloc[i]
                cn_qty = cn_row['billed_quantity'] + cn_qty_remaining
                credit_notes.append(cn_row['debit_note_number'])
            else:
                cn_qty = cn_qty_remaining
            
            # Calculate mapping quantities
            qty_to_map = min(pn_qty, cn_qty) if (pn_qty > 0 and cn_qty > 0) else 0
            
            record = {
                'PN_Date': pn_row['date'] if has_pn else None,
                'CN_Date': cn_row['vendor_cn_date'] if has_cn else None,
                'Pick_Note': pick_notes.copy() if has_pn else None,
                'Credit_Note': credit_notes.copy() if has_cn else None,
                'Key': key,
                'Vendor': pn_row['vendor_name'] if has_pn else (cn_row['vendor_name'] if has_cn else None),
                'Original_PN_Qty': pn_row['quantity'] if has_pn else 0,
                'Original_CN_Qty': cn_row['billed_quantity'] if has_cn else 0,
                'PN_Qty': qty_to_map,
                'CN_Qty': qty_to_map,
                'PN_Remaining': pn_qty - qty_to_map,
                'CN_Remaining': cn_qty - qty_to_map
            }
            
            mapped_records.append(record)
            
            # Update remaining quantities
            pn_qty_remaining = pn_qty - qty_to_map if pn_qty - qty_to_map > 0 else 0
            cn_qty_remaining = cn_qty - qty_to_map if cn_qty - qty_to_map > 0 else 0
            
            # Remove used pick notes and credit notes
            if pn_qty_remaining == 0:
                pick_notes = []
            if cn_qty_remaining == 0:
                credit_notes = []
    
    # Create DataFrame and format dates
    result_df = pd.DataFrame(mapped_records)
    result_df['PN_Date'] = result_df['PN_Date'].apply(
        lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None
    )
    result_df['CN_Date'] = result_df['CN_Date'].apply(
        lambda x: x.strftime('%Y-%m-%d') if pd.notnull(x) else None
    )

    # Now create a new column named Remarks which would have value in string format and take value depending on the function add_remarks
    result_df['Remarks'] = result_df.apply(lambda x: add_remarks(x['PN_Remaining'], x['CN_Remaining']), axis=1)
    
    result_df.to_csv('complete_mapping2.csv', index=False)
    return result_df

results = process_mapping2()