#### 🔷 Loads settings from a .env file and saves them into variables for use in the program.

In [1]:
from dotenv import load_dotenv
import os
load_dotenv(override=True) 
jan = os.getenv("Jan")
feb = os.getenv("Feb")
mar = os.getenv("Mar")
apr= os.getenv("Apr")
may = os.getenv("May")
jun = os.getenv("Jun")
jul = os.getenv("Jul")
aug = os.getenv("Aug")
sep = os.getenv("Sep")
oct = os.getenv("Oct")
nov = os.getenv("Nov")
dec = os.getenv("Dec")
path= os.getenv('Path')



In [2]:
import pandas as pd
import numpy as np
import random

##### 🔷 loading CSV files for each month into separate DataFrames

In [3]:
jan_df = pd.read_csv(jan)
feb_df = pd.read_csv(feb)
mar_df = pd.read_csv(mar)
apr_df = pd.read_csv(apr)
may_df = pd.read_csv(may,encoding='ISO-8859-1')
jun_df = pd.read_csv(jun,encoding='ISO-8859-1')
jul_df = pd.read_csv(jul,encoding='ISO-8859-1')
aug_df = pd.read_csv(aug,encoding='ISO-8859-1')
sep_df = pd.read_csv(sep,encoding='ISO-8859-1')
oct_df = pd.read_csv(oct,encoding='ISO-8859-1')
nov_df = pd.read_csv(nov,encoding='ISO-8859-1')
dec_df = pd.read_csv(dec,encoding='ISO-8859-1')

##### 🔷 puting all your monthly DataFrames into a list

In [4]:
df_list = [jan_df,
           feb_df,
           mar_df,
           apr_df,
           may_df,
           jun_df,
           jul_df,
           aug_df,
           sep_df,
           oct_df,
           nov_df,
           dec_df]

##### 🔷 finding the common columns across all your monthly DataFrames by intersecting their column sets.

In [5]:
common_column = set(df_list[0].columns)
print(common_column)

{'Production Quantity', 'Filter damage (P)', 'Drain valve wire cut (S)', 'Date', 'Drain valve damage (S)', 'Cartridge/ Element O ring miss (R)', 'Leak through filter seam (S)', 'Head leakage through adaptor/connector (R)', 'Leak through breathing/ bleed screw (R)', 'DV leak from body (S)', 'DV leak from O-ring (S)', 'Filter dent & damage (S)', 'Filter paint not ok (P)', 'Filter screen overlap (S)', 'Filter paint not ok (S)', 'Reed valve damage (P)', 'Part No', 'Head casting leak (R)', 'Filter crimp nut leak (S)', 'Filter screen not ok (S)', 'Shift'}


In [6]:
for df in df_list[1:]:
    common_column &= set(df.columns)


In [7]:
print(len(common_column))
print(common_column)

10
{'Production Quantity', 'Head casting leak (R)', 'Filter dent & damage (S)', 'Date', 'Filter crimp nut leak (S)', 'Filter screen not ok (S)', 'Filter paint not ok (S)', 'Shift', 'Leak through filter seam (S)', 'Part No'}


##### 🔷  converting the common_column set back into a list and preserving the column order from jan_df

In [8]:
common_column = [col for col in jan_df.columns if col in common_column]
# to maintain consistent column order when selecting columns.

In [9]:
jan_df[common_column]

Unnamed: 0,Date,Shift,Part No,Production Quantity,Filter dent & damage (S),Filter paint not ok (S),Filter screen not ok (S),Leak through filter seam (S),Filter crimp nut leak (S),Head casting leak (R)
0,02-01-2024,I,8710161,60,,,,,,
1,02-01-2024,I,5038945,300,1.0,131.0,,,,
2,02-01-2024,I,5037085,90,6.0,10.0,,,,
3,02-01-2024,I,5007058,90,,,,,,
4,02-01-2024,I,5005557,90,,56.0,,,9.0,
...,...,...,...,...,...,...,...,...,...,...
539,31-01-2024,II,5001370,12,,,,,,
540,31-01-2024,II,8710281,12,,,,,,
541,31-01-2024,II,5038956,204,,,,,,2.0
542,31-01-2024,II,8710100,24,,,,,,


##### 🔷 creates a new list of DataFrames, each containing only the common columns in the same order, which is perfect before concatenation.

In [10]:
dfs_common = [df[common_column] for df in df_list]

In [11]:
dfs_common[0]

Unnamed: 0,Date,Shift,Part No,Production Quantity,Filter dent & damage (S),Filter paint not ok (S),Filter screen not ok (S),Leak through filter seam (S),Filter crimp nut leak (S),Head casting leak (R)
0,02-01-2024,I,8710161,60,,,,,,
1,02-01-2024,I,5038945,300,1.0,131.0,,,,
2,02-01-2024,I,5037085,90,6.0,10.0,,,,
3,02-01-2024,I,5007058,90,,,,,,
4,02-01-2024,I,5005557,90,,56.0,,,9.0,
...,...,...,...,...,...,...,...,...,...,...
539,31-01-2024,II,5001370,12,,,,,,
540,31-01-2024,II,8710281,12,,,,,,
541,31-01-2024,II,5038956,204,,,,,,2.0
542,31-01-2024,II,8710100,24,,,,,,


##### 🔷 unpacking the list dfs_common back into the original monthly DataFrame variables

In [12]:
jan_df,feb_df,mar_df,apr_df,may_df,jun_df,jul_df,aug_df,sep_df,oct_df,nov_df,dec_df = dfs_common

##### 🔷 This loop will print the column names of each cleaned DataFrame with a label like df1, df2, …, so you can easily verify that all of them have the same columns:

In [13]:
for i,df in enumerate(dfs_common,1):
    print(f'df{i}',df.columns.to_list())

df1 ['Date', 'Shift', 'Part No', 'Production Quantity', 'Filter dent & damage (S)', 'Filter paint not ok (S)', 'Filter screen not ok (S)', 'Leak through filter seam (S)', 'Filter crimp nut leak (S)', 'Head casting leak (R)']
df2 ['Date', 'Shift', 'Part No', 'Production Quantity', 'Filter dent & damage (S)', 'Filter paint not ok (S)', 'Filter screen not ok (S)', 'Leak through filter seam (S)', 'Filter crimp nut leak (S)', 'Head casting leak (R)']
df3 ['Date', 'Shift', 'Part No', 'Production Quantity', 'Filter dent & damage (S)', 'Filter paint not ok (S)', 'Filter screen not ok (S)', 'Leak through filter seam (S)', 'Filter crimp nut leak (S)', 'Head casting leak (R)']
df4 ['Date', 'Shift', 'Part No', 'Production Quantity', 'Filter dent & damage (S)', 'Filter paint not ok (S)', 'Filter screen not ok (S)', 'Leak through filter seam (S)', 'Filter crimp nut leak (S)', 'Head casting leak (R)']
df5 ['Date', 'Shift', 'Part No', 'Production Quantity', 'Filter dent & damage (S)', 'Filter paint no

In [14]:
pd.concat(dfs_common).index

Index([  0,   1,   2,   3,   4,   5,   6,   7,   8,   9,
       ...
       595, 596, 597, 598, 599, 600, 601, 602, 603, 604],
      dtype='int64', length=6394)

##### 🔷 combines all your monthly DataFrames into one big DataFrame, stacking rows while resetting the index.

In [15]:
concat_df = pd.concat(dfs_common,ignore_index=True)

In [16]:
concat_df.index

RangeIndex(start=0, stop=6394, step=1)

##### 🔷 renaming the columns of your combined DataFrame concat_df with the new list updated_column_name

In [17]:
concat_df.columns

Index(['Date', 'Shift', 'Part No', 'Production Quantity',
       'Filter dent & damage (S)', 'Filter paint not ok (S)',
       'Filter screen not ok (S)', 'Leak through filter seam (S)',
       'Filter crimp nut leak (S)', 'Head casting leak (R)'],
      dtype='object')

In [18]:
updated_column_name=['Date',
                     'Shift',
                     'Part_No',
                     'Production',
                     'Dent_Defect',
                     'Paint_Defect',
                     'Screen Defect',
                     'Seam_leak',
                     'Crimp_leak',
                     'Casting_leak']

In [19]:
concat_df.columns  = updated_column_name

##### 🔷 Replacing Part Numbers using random

In [20]:
uunique_parts = concat_df['Part_No'].unique().tolist()

In [21]:
len(uunique_parts)

393

In [22]:
new_ids = random.sample(range(1000, 10000), 393)


In [23]:
print(new_ids)

[2406, 6446, 4831, 5043, 5216, 1521, 9351, 2257, 1758, 8491, 2125, 1254, 8918, 5754, 5807, 2210, 7718, 8643, 8817, 8159, 3314, 2645, 1585, 1049, 3969, 5342, 2734, 4189, 1401, 5139, 4271, 8092, 9951, 8060, 8093, 7385, 1479, 3987, 1353, 9070, 7861, 9164, 2998, 5741, 4156, 4225, 8281, 4475, 3287, 2677, 3229, 7421, 6395, 7361, 6138, 2577, 5162, 3395, 3416, 3766, 7447, 6736, 6141, 8052, 4432, 4678, 8682, 9651, 4406, 6881, 2079, 6504, 4948, 5831, 7052, 3503, 9232, 7008, 4295, 6724, 6160, 9370, 3709, 7224, 4076, 9703, 9684, 9394, 7653, 1687, 1774, 9712, 7157, 7539, 6811, 7727, 6558, 3045, 2422, 1841, 9294, 9769, 9564, 9454, 3331, 2715, 1749, 4610, 7722, 8036, 8374, 1432, 6929, 2548, 1998, 9334, 6715, 7391, 9825, 2627, 1277, 6634, 6962, 2716, 8312, 7460, 6731, 3294, 7230, 4857, 7312, 9376, 5009, 2791, 5261, 5389, 5160, 9075, 3087, 7305, 9470, 6659, 3948, 6623, 6158, 4482, 8623, 8672, 6291, 8308, 6899, 8365, 7679, 8387, 1346, 6370, 8842, 1324, 6414, 2132, 4802, 6688, 9706, 5382, 9500, 4103, 161

In [24]:
new_ids = [str(num) for num in new_ids]
# convert to string

In [25]:
mapping_dict=dict(zip(uunique_parts,new_ids))

In [26]:
print(mapping_dict)

{'8710161': '2406', '5038945': '6446', '5037085': '4831', '5007058': '5043', '5005557': '5216', '8710124': '1521', '8710165': '9351', '8321030': '2257', '5038956': '1758', '5006352': '8491', '8710216': '2125', '5010300': '1254', '5000825': '8918', '5025011': '5754', '5032195': '5807', '8710102': '2210', '8710334': '7718', '5022425': '8643', '8710188': '8817', '8710160': '8159', '5023493': '3314', '5002383': '2645', '8710010': '1585', '5005267': '1049', '5001687': '3969', '5014545': '5342', '8710055': '2734', '5018840': '4189', '5010297': '1401', '5015095': '5139', '5039741': '4271', '5010284': '8092', '8710201': '9951', '5015075': '8060', '8710147': '8093', '8710281': '7385', '8710345': '1479', '5010519': '3987', '8710197': '1353', '5011164': '9070', '8710029': '7861', '5028752': '9164', '8710234': '2998', '8710115': '5741', '5023377': '4156', '8710305': '4225', '5010515': '8281', '5031771': '4475', '5021135': '3287', '5021673': '2677', '5034370': '3229', '5023429': '7421', '5032259': 

In [27]:
concat_df['Part_No'] = concat_df['Part_No'].map(mapping_dict)

In [28]:
print(concat_df['Part_No'].nunique()) 

393


##### 🔷 Changing data types 

In [29]:
concat_df.dtypes

Date              object
Shift             object
Part_No           object
Production         int64
Dent_Defect      float64
Paint_Defect     float64
Screen Defect     object
Seam_leak        float64
Crimp_leak       float64
Casting_leak     float64
dtype: object

In [30]:
concat_df['Screen Defect'].unique()

array([nan, 1.0, 2.0, 4.0, 6.0, 3.0, 11.0, 5.0, 10.0, 21.0, 8.0, 20.0,
       22.0, 12.0, 7.0, 15.0, 13.0, 9.0, '1', '2', '  ', '3', '4', '5',
       '6', '7', ' ', '13', '9', '8'], dtype=object)

In [31]:
dtype_change_clm = concat_df.columns[4:]
print(dtype_change_clm)

Index(['Dent_Defect', 'Paint_Defect', 'Screen Defect', 'Seam_leak',
       'Crimp_leak', 'Casting_leak'],
      dtype='object')


In [32]:
concat_df[dtype_change_clm] = concat_df[dtype_change_clm]\
    .replace(r'^\s*$', 0, regex=True)\
    .fillna(0)\
    .apply(pd.to_numeric, errors='coerce')\
    .fillna(0)\
    .astype(int)


In [33]:
concat_df[['Shift','Part_No']]=concat_df[['Shift','Part_No']].astype('string')

In [57]:
concat_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6394 entries, 0 to 6393
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           6394 non-null   datetime64[ns]
 1   Shift          6394 non-null   string        
 2   Part_No        6394 non-null   string        
 3   Production     6394 non-null   int64         
 4   Dent_Defect    6394 non-null   int64         
 5   Paint_Defect   6394 non-null   int64         
 6   Screen Defect  6394 non-null   int64         
 7   Seam_leak      6394 non-null   int64         
 8   Crimp_leak     6394 non-null   int64         
 9   Casting_leak   6394 non-null   int64         
dtypes: datetime64[ns](1), int64(7), string(2)
memory usage: 499.7 KB


In [None]:
concat_df['Date']= pd.to_datetime(concat_df['Date'], format='mixed', dayfirst=True)
#if the date formats are inconsistent, you can tell pandas to infer them with dayfirst=True

In [None]:
# concat_df.to_csv(rf'{path}\concatall.csv',index=False)