In [1]:
import pandas as pd
import os
import glob


In [3]:
def create_dict_from_csv(file_path):
    df = pd.read_csv(file_path)
    df['file_name'] = df['file_name'].str[:2]
    result_dict = pd.Series(df['file_name'].values, index=df['state']).to_dict()
    return result_dict

# Example usage
file_path = r'C:\Users\himan\Downloads\dataSource\stations_info.csv'
state_dict = create_dict_from_csv(file_path)
print(state_dict)


{'Andhra Pradesh': 'AP', 'Arunachal Pradesh': 'AR', 'Assam': 'AS', 'Bihar': 'BR', 'Chhattisgarh': 'CG', 'Chandigarh': 'CH', 'Delhi': 'DL', 'Gujarat': 'GJ', 'Himachal Pradesh': 'HP', 'Haryana': 'HR', 'Jharkhand': 'JH', 'Jammu and Kashmir': 'JK', 'Karnataka': 'KA', 'Kerala': 'KL', 'Maharashtra': 'MH', 'Meghalaya': 'ML', 'Manipur': 'MN', 'Madhya Pradesh': 'MP', 'Mizoram': 'MZ', 'Nagaland': 'NL', 'Odisha': 'OR', 'Punjab': 'PB', 'Puducherry': 'PY', 'Rajasthan': 'RJ', 'Sikkim': 'SK', 'Telangana': 'TG', 'Tamil Nadu': 'TN', 'Tripura': 'TR', 'Uttarakhand': 'UK', 'Uttar Pradesh': 'UP', 'West Bengal': 'WB'}


In [4]:
print(state_dict.values())

dict_values(['AP', 'AR', 'AS', 'BR', 'CG', 'CH', 'DL', 'GJ', 'HP', 'HR', 'JH', 'JK', 'KA', 'KL', 'MH', 'ML', 'MN', 'MP', 'MZ', 'NL', 'OR', 'PB', 'PY', 'RJ', 'SK', 'TG', 'TN', 'TR', 'UK', 'UP', 'WB'])


In [7]:
state_code=list(state_dict.values())


directory_path = r'C:\Users\himan\Downloads\dataSource\archive'  # Update this path as needed
savepath = r'C:\Users\himan\Downloads\dataSource\output'




# Define the prefixes you want to handle
prefixes = state_code  # Add other prefixes as needed

# Dictionary to hold dataframes for each prefix
dataframes_dict = {prefix: [] for prefix in prefixes}

# Get all CSV files for each prefix
for prefix in prefixes:
    csv_files = glob.glob(os.path.join(directory_path, f'{prefix}*.csv'))
    
    # Read each CSV file and update the list of all columns
    for file in csv_files:
        try:
            df = pd.read_csv(file)
            dataframes_dict[prefix].append(df)
        except Exception as e:
            print(f"Error reading {file}: {e}")

# Function to ensure all dataframes have the same columns
def align_columns(dataframes):
    all_columns = set()
    for df in dataframes:
        all_columns.update(df.columns)
    all_columns = sorted(all_columns)  # Sort columns for consistency
    for i, df in enumerate(dataframes):
        for column in all_columns:
            if column not in df.columns:
                df[column] = None
        dataframes[i] = df[all_columns]
    return dataframes

# Process each prefix
for prefix, dataframes in dataframes_dict.items():
    # Ensure all dataframes for the current prefix have the same columns
    dataframes = align_columns(dataframes)
    
    # Extract state abbreviation from file names and add a state column
    for file, df in zip(glob.glob(os.path.join(directory_path, f'{prefix}*.csv')), dataframes):
        state_abbr = os.path.basename(file)[:2]  # Extract the state abbreviation from the file name
        state_name = next((name for name, abbr in state_dict.items() if abbr == state_abbr), None)
        df['state'] = state_name
    
    # Concatenate all dataframes for the current prefix
    if dataframes:
        merged_df = pd.concat(dataframes, ignore_index=True)
        print(f"Merged DataFrame for prefix '{prefix}':")
        print(merged_df)
        
        # Save the merged dataframe to a new CSV file
        merged_df.to_csv(os.path.join(savepath, f'merged_output_{prefix}.csv'), index=False)
        print(f"Merged CSV files for prefix '{prefix}' saved as 'merged_output_{prefix}.csv'")
    else:
        print(f"No valid files found for prefix '{prefix}'.")


Merged DataFrame for prefix 'AP':
        AT (degree C)  BP (mmHg)  Benzene (ug/m3)  CO (mg/m3)  \
0               23.05        NaN             1.00        0.48   
1                 NaN        NaN             0.70        0.49   
2                 NaN        NaN              NaN         NaN   
3                 NaN        NaN              NaN         NaN   
4                 NaN        NaN             0.60        0.47   
...               ...        ...              ...         ...   
272212          30.00     732.17             0.81        1.08   
272213          29.44     731.53             0.78        1.09   
272214          29.61     732.39             0.80        0.66   
272215          29.08     731.31             0.81        0.39   
272216          28.44     722.96             0.80        0.26   

       Eth-Benzene (ug/m3)            From Date MP-Xylene (ug/m3)  \
0                     None     01-07-2016 10:00              None   
1                     None     01-07-2016 11:00