#### 🔷 Loads settings from a .env file and saves them into variables for use in the program.

In [None]:
from dotenv import load_dotenv
import os
load_dotenv(override=True) 
jan = os.getenv("Jan")
feb = os.getenv("Feb")
mar = os.getenv("Mar")
apr= os.getenv("Apr")
may = os.getenv("May")
jun = os.getenv("Jun")
jul = os.getenv("Jul")
aug = os.getenv("Aug")
sep = os.getenv("Sep")
oct = os.getenv("Oct")
nov = os.getenv("Nov")
dec = os.getenv("Dec")
path= os.getenv('Path')



In [None]:
import pandas as pd
import numpy as np
import random

##### 🔷 loading CSV files for each month into separate DataFrames

In [None]:
jan_df = pd.read_csv(jan)
feb_df = pd.read_csv(feb)
mar_df = pd.read_csv(mar)
apr_df = pd.read_csv(apr)
may_df = pd.read_csv(may,encoding='ISO-8859-1')
jun_df = pd.read_csv(jun,encoding='ISO-8859-1')
jul_df = pd.read_csv(jul,encoding='ISO-8859-1')
aug_df = pd.read_csv(aug,encoding='ISO-8859-1')
sep_df = pd.read_csv(sep,encoding='ISO-8859-1')
oct_df = pd.read_csv(oct,encoding='ISO-8859-1')
nov_df = pd.read_csv(nov,encoding='ISO-8859-1')
dec_df = pd.read_csv(dec,encoding='ISO-8859-1')

##### 🔷 puting all your monthly DataFrames into a list

In [None]:
df_list = [jan_df,
           feb_df,
           mar_df,
           apr_df,
           may_df,
           jun_df,
           jul_df,
           aug_df,
           sep_df,
           oct_df,
           nov_df,
           dec_df]

##### 🔷 finding the common columns across all your monthly DataFrames by intersecting their column sets.

In [None]:
common_column = set(df_list[0].columns)
print(common_column)

In [None]:
for df in df_list[1:]:
    common_column &= set(df.columns)


In [None]:
print(len(common_column))
print(common_column)

##### 🔷  converting the common_column set back into a list and preserving the column order from jan_df

In [None]:
common_column = [col for col in jan_df.columns if col in common_column]
# to maintain consistent column order when selecting columns.

In [None]:
jan_df[common_column]

##### 🔷 creates a new list of DataFrames, each containing only the common columns in the same order, which is perfect before concatenation.

In [None]:
dfs_common = [df[common_column] for df in df_list]

In [None]:
dfs_common[0]

##### 🔷 unpacking the list dfs_common back into the original monthly DataFrame variables

In [None]:
jan_df,feb_df,mar_df,apr_df,may_df,jun_df,jul_df,aug_df,sep_df,oct_df,nov_df,dec_df = dfs_common

##### 🔷 This loop will print the column names of each cleaned DataFrame with a label like df1, df2, …, so you can easily verify that all of them have the same columns:

In [None]:
for i,df in enumerate(dfs_common,1):
    print(f'df{i}',df.columns.to_list())

In [None]:
pd.concat(dfs_common).index

##### 🔷 combines all your monthly DataFrames into one big DataFrame, stacking rows while resetting the index.

In [None]:
concat_df = pd.concat(dfs_common,ignore_index=True)

In [None]:
concat_df.index

##### 🔷 renaming the columns of your combined DataFrame concat_df with the new list updated_column_name

In [None]:
concat_df.columns

In [None]:
updated_column_name=['Date',
                     'Shift',
                     'Part_No',
                     'Production',
                     'Dent_Defect',
                     'Paint_Defect',
                     'Screen Defect',
                     'Seam_leak',
                     'Crimp_leak',
                     'Casting_leak']

In [None]:
concat_df.columns  = updated_column_name

##### 🔷 Replacing Part Numbers using random

In [None]:
uunique_parts = concat_df['Part_No'].unique().tolist()

In [None]:
len(uunique_parts)

In [None]:
new_ids = random.sample(range(1000, 10000), 393)


In [None]:
print(new_ids)

In [None]:
new_ids = [str(num) for num in new_ids]
# convert to string

In [None]:
mapping_dict=dict(zip(uunique_parts,new_ids))

In [None]:
print(mapping_dict)

In [None]:
concat_df['Part_No'] = concat_df['Part_No'].map(mapping_dict)

In [None]:
print(concat_df['Part_No'].nunique()) 

##### 🔷 Changing data types 

In [None]:
concat_df.dtypes

In [None]:
concat_df['Screen Defect'].unique()

In [None]:
dtype_change_clm = concat_df.columns[4:]
print(dtype_change_clm)

In [None]:
concat_df[dtype_change_clm] = concat_df[dtype_change_clm]\
    .replace(r'^\s*$', 0, regex=True)\
    .fillna(0)\
    .apply(pd.to_numeric, errors='coerce')\
    .fillna(0)\
    .astype(int)


In [None]:
concat_df[['Shift','Part_No']]=concat_df[['Shift','Part_No']].astype('string')

In [59]:
concat_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6394 entries, 0 to 6393
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   Date           6394 non-null   datetime64[ns]
 1   Shift          6394 non-null   string        
 2   Part_No        6394 non-null   string        
 3   Production     6394 non-null   int64         
 4   Dent_Defect    6394 non-null   int64         
 5   Paint_Defect   6394 non-null   int64         
 6   Screen Defect  6394 non-null   int64         
 7   Seam_leak      6394 non-null   int64         
 8   Crimp_leak     6394 non-null   int64         
 9   Casting_leak   6394 non-null   int64         
dtypes: datetime64[ns](1), int64(7), string(2)
memory usage: 499.7 KB


In [None]:
concat_df['Date']= pd.to_datetime(concat_df['Date'], format='mixed', dayfirst=True)
#if the date formats are inconsistent, you can tell pandas to infer them with dayfirst=True

In [None]:
# concat_df.to_csv(rf'{path}\concatall.csv',index=False)