In [1]:
import pandas as pd
import numpy as np
import warnings
import os
warnings.filterwarnings("ignore")

cwd = os.getcwd()
rm_missing = os.path.join(cwd, "client_data","18-04-2024_Missing_RM_Prices_Update (1).xlsx")

df_RM=pd.read_excel(rm_missing,sheet_name='I. Missing RM Prices')
df_RM=df_RM.drop(columns='Remarks')

In [2]:
df_melted = pd.melt(df_RM, 
                     id_vars=['Factory','RMSKUCode', 'RM SKU Description','UOM','Currency'], 
                     value_vars=df_RM.columns[5:], 
                     var_name='Date', 
                     value_name='RM Cost')

In [3]:
# exchange_rates = {
#     'KWD': 0.306001,
#     'SAR': 3.75000,
#     'AED': 3.672500,
# }

## Revised Exchange rates for year 2023
exchange_rates = {
    'KWD': 0.3072,
    'SAR': 3.75000,
    'AED': 3.672500,
}
df_melted['RM Cost'] = df_melted.apply(lambda row: row['RM Cost'] / exchange_rates[row['Currency']], axis=1)
month_mapping = {
    'Jan': 1, 'Feb': 2, 'Mar': 3, 'Apr': 4,
    'May': 5, 'Jun': 6, 'Jul': 7, 'Aug': 8,
    'Sep': 9, 'Oct': 10, 'Nov': 11, 'Dec': 12
}

# Apply the mapping to the "Date" column
df_melted['MthNum'] = df_melted['Date'].map(month_mapping)
df_melted['retain flag']=1
df_Missing_RM_Cost=df_melted[['Factory','RMSKUCode','MthNum','RM Cost','retain flag']]

df_Missing_RM_Cost=df_Missing_RM_Cost.rename(columns={
    'Factory':'FACTCode',
    'RMSKUCode':'COMP_ITEM_CODE',
    'MthNum':'MthNum',
    'RM Cost':'RM Cost',
    'retain flag':'retain flag'
})
df_Missing_RM_Cost
#df_Missing_RM_Cost.to_excel('Missing_RM_cost.xlsx',index=False)

Unnamed: 0,FACTCode,COMP_ITEM_CODE,MthNum,RM Cost,retain flag
0,KFC,1006102560,1,0.003255,1
1,GFC,1006102560,1,0.007897,1
2,KFC,2013900198,1,0.097656,1
3,KFC,2013115034,1,0.129232,1
4,KFC,2013904050,1,0.141314,1
...,...,...,...,...,...
2839,NFC,2013119016,12,0.000272,1
2840,NFC,2013119017,12,0.000272,1
2841,NFC,2013900198,12,0.000272,1
2842,NFC,2013995077,12,0.000272,1


In [4]:

old_data = pd.read_csv(r"RM_Cost_mapped_old.csv")
old_data['COMP_ITEM_CODE'] = old_data['COMP_ITEM_CODE'].astype(float)

#old_data = old_data.iloc[:, 1:]
old_data
#new_data = pd.read_excel(r'Missing_RM_cost.xlsx')
new_data= df_Missing_RM_Cost.copy()
new_data['COMP_ITEM_CODE'] = new_data['COMP_ITEM_CODE'].astype(float)

In [5]:
old_data.dtypes

FACTCode           object
COMP_ITEM_CODE    float64
MthNum              int64
RM Cost           float64
retain_flag         int64
dtype: object

In [6]:
new_data.dtypes

FACTCode           object
COMP_ITEM_CODE    float64
MthNum              int64
RM Cost           float64
retain flag         int64
dtype: object

In [7]:
old_data_1= old_data[old_data['retain_flag']==1]
old_data_0= old_data[old_data['retain_flag']==0]
old_data_0

Unnamed: 0,FACTCode,COMP_ITEM_CODE,MthNum,RM Cost,retain_flag
1188,GFC,1.006103e+09,1,,0
1189,GFC,1.006103e+09,2,,0
1190,GFC,1.006103e+09,3,,0
1191,GFC,1.006103e+09,4,,0
1192,GFC,1.006103e+09,5,,0
...,...,...,...,...,...
13831,GFC,1.006106e+09,8,,0
13832,GFC,1.006106e+09,9,,0
13833,GFC,1.006106e+09,10,,0
13834,GFC,1.006106e+09,11,,0


In [8]:
old_data_1.head()

Unnamed: 0,FACTCode,COMP_ITEM_CODE,MthNum,RM Cost,retain_flag
0,NFC,2011104000.0,1,0.273406,1
1,NFC,2011104000.0,2,0.273406,1
2,NFC,2011104000.0,3,0.273406,1
3,NFC,2011104000.0,4,0.278191,1
4,NFC,2011104000.0,5,0.282997,1


In [9]:
old_data_0['key'] = 'F_'+ old_data_0['FACTCode'].astype('str') + 'C_' + old_data_0['COMP_ITEM_CODE'].astype('str') + 'M_' + old_data_0['MthNum'].astype('str')

new_data['key'] = 'F_'+ new_data['FACTCode'].astype('str') + 'C_' + new_data['COMP_ITEM_CODE'].astype('str') + 'M_' + new_data['MthNum'].astype('str')

df_old_0_new=pd.merge(old_data_0,new_data, on='key', how='left')
# df_old_0_new
df_old_0_new = df_old_0_new[['FACTCode_x', 'COMP_ITEM_CODE_x', 'MthNum_x', 'RM Cost_y', 'retain flag']]
df_old_0_new = df_old_0_new.rename(columns={'FACTCode_x': 'FACTCode', 'COMP_ITEM_CODE_x': 'COMP_ITEM_CODE', 'MthNum_x': 'MthNum', 'RM Cost_y': 'RM Cost', 'retain flag': 'retain_flag'})
df_old_0_new['retain_flag'] = df_old_0_new['retain_flag'].fillna(0)

In [10]:
old_data_new = pd.concat([old_data_1,df_old_0_new])
old_data_new = old_data_new.groupby(['FACTCode', 'COMP_ITEM_CODE']).apply(lambda x: x[['FACTCode', 'COMP_ITEM_CODE', 'MthNum', 'RM Cost', 'retain_flag']].assign(Available_1_to_12=x['RM Cost'].notnull().all())).reset_index(drop=True)

#old_data_new[old_data_new['retain flag']==0]
old_data_new.to_csv('RM_Cost_mapped_old+missing.csv')

In [11]:
###After adding missing RMs - recreate average costs to run the missing values

In [12]:
RM_Cost_mapped= pd.read_csv(r'RM_Cost_mapped_old+missing.csv')
    
#####################################################################
print (" Monthly RM cost is prepared and generated !!!! QC pending .......")  

'''
QC Check : RM landed cost does not vary more than 5% month-on-month for any RM

Note: The QC will only be applied for the RM-Factory Combinations that has landed cost available for all the months - IF QC is failed a report will be generated for failed cases

'''   

# Filter rows where retain flag is equal to 1
filtered_RM_Cost_mapped = RM_Cost_mapped[RM_Cost_mapped['retain_flag'] == 1]

# Group by FACTCode and COMP_ITEM_CODE, and count the number of unique MthNum values
grouped_counts = filtered_RM_Cost_mapped.groupby(['FACTCode', 'COMP_ITEM_CODE'])['MthNum'].nunique()

# Filter groups where the count of unique MthNum values is 12 (for all 12 months)
valid_groups = grouped_counts[grouped_counts == 12].index

# Apply the check for RM landed cost variation for the valid groups
violations = []

# Define column names
columns = ['FACTCode', 'COMP_ITEM_CODE', 'MthNum', 'RM Cost', 'retain_flag', 'RM Cost Change']
## Define Blank Dataframe for QC failed cases 
QC_failed = pd.DataFrame(columns=columns)

for fact_code, comp_item_code in valid_groups:
    group_data = filtered_RM_Cost_mapped[(filtered_RM_Cost_mapped['FACTCode'] == fact_code) & (filtered_RM_Cost_mapped['COMP_ITEM_CODE'] == comp_item_code)]
    group_data['RM Cost Change'] = group_data['RM Cost'].pct_change() * 100
    # Set percentage change as zero for the first month
    group_data.loc[group_data['MthNum'] == group_data['MthNum'].min(), 'RM Cost Change'] = 0
    if group_data['RM Cost Change'].abs().max() > 5:
        violations.append((fact_code, comp_item_code))
        QC_failed = QC_failed._append(group_data,ignore_index=True)

### Export a report of RMs that is failing this perticular QC 

QC_report_path = 'QC_failed.csv'
QC_failed.to_csv(QC_report_path,index=False)

# List down COMP_ITEM_CODE values for which the check is not applied
not_applied = grouped_counts[grouped_counts < 12].index

print("RMs where the check is not applied:")
print(not_applied)

if violations:
    print("RM landed cost varies more than 5% month-on-month for RMs added in the report at:",QC_report_path )

else:
    print("RM landed cost does not vary more than 5% month-on-month for any RM.")

 Monthly RM cost is prepared and generated !!!! QC pending .......
RMs where the check is not applied:
MultiIndex([], names=['FACTCode', 'COMP_ITEM_CODE'])
RM landed cost varies more than 5% month-on-month for RMs added in the report at: QC_failed.csv


In [13]:
######################  Execute the Data Processing #########################
'''
Step-7: Derive Average RM cost for only those cases where Factory-RM level cost is available for all the months and return the average cost data

'''
       
#### Take sum of retain flag ###
df2 = RM_Cost_mapped.groupby(['FACTCode', 'COMP_ITEM_CODE'])['retain_flag'].sum()
df2 = pd.DataFrame(df2)

# Merge RM_Cost_mapped with df2 based on 'FACTCode' and 'COMP_ITEM_CODE'
merged_df = pd.merge(RM_Cost_mapped, df2, on=['FACTCode', 'COMP_ITEM_CODE'], suffixes=('_RM_Cost_mapped', '_df2'))

# Filter merged_df for rows where retain flag is 12 (As there are total of 12 months)
filtered_RM_Cost_mapped = merged_df[merged_df['retain_flag_df2'] == 12]
filtered_RM_Cost_mapped
print('filtered_RM_Cost_mapped is ready')

# Group by 'FACTCode' and 'COMP_ITEM_CODE' and calculate the mean of 'RM Cost' within each group
average_RM_cost = filtered_RM_Cost_mapped.groupby(['FACTCode', 'COMP_ITEM_CODE'])['RM Cost'].mean().reset_index()
average_RM_cost['retain_flag'] = 12
# average_RM_cost

# Display the resulting DataFrame
average_RM_cost1 = average_RM_cost.copy()
# df.groupby(['col1', 'col2']).size().reset_index(name='counts')
average_RM_cost1 = average_RM_cost1.groupby(['FACTCode', 'COMP_ITEM_CODE']).size().reset_index(name='counts')
if len(average_RM_cost1[average_RM_cost1['counts']>1])== 0:
      print("No duplicate entries found for 'FACTCode' and 'COMP_ITEM_CODE' combinations.")
      average_RM_cost.to_csv('Average RM Cost.csv',index=False)
      print( "Average RM Cost derivation is Done!!! ")
else:
    duplicate_entries = average_RM_cost1[average_RM_cost1['counts']>1]
    print("Duplicate entries found for the following 'FACTCode' and 'COMP_ITEM_CODE' combinations: (please check the input data)")
    print(duplicate_entries[['FACTCode', 'COMP_ITEM_CODE']].drop_duplicates())
    print( "Average RM Cost derivation is Stopped..... ")

filtered_RM_Cost_mapped is ready
No duplicate entries found for 'FACTCode' and 'COMP_ITEM_CODE' combinations.
Average RM Cost derivation is Done!!! 
