In [18]:
import os
import pandas as pd

# Path to the folder containing CSV files
folder_path = "/Users/kunalpathak9826/Desktop/ISRO/Data/INSAT"

# Function to process each CSV file
def process_csv(file_path):
    # Read CSV file
    df = pd.read_csv(file_path)
    
    # Extract time stamp from file name or time variable
    file_name = os.path.basename(file_path)
    date_stamp = file_name.split("_")[1].split(".")[0][:8]  # Extracting the date from the filename
    time_stamp = file_name.split("_")[1].split(".")[0][8:]  # Extracting the time from the filename
    
    # Rename columns with time stamp
    new_columns = {}
    for col in df.columns[3:]:
        new_columns[col] = f"{date_stamp} {time_stamp} {col}"  # Adding both date and time to the column names
    df.rename(columns=new_columns, inplace=True)
    
    # Drop the 'time' column
    if 'date/time' in df.columns:
        df.drop(columns=['date/time'], inplace=True)
    
    return df

# List to hold dataframes of all CSV files
dfs = []

# Iterate over all CSV files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):
        file_path = os.path.join(folder_path, file_name)
        dfs.append(process_csv(file_path))

# Merge dataframes on latitude and longitude
final_df = dfs[0]  # Start with the first dataframe
for df in dfs[1:]:
    final_df = pd.merge(final_df, df, on=['latitude', 'longitude'], how='outer')

# Reorder columns based on timestamp in GMT
final_df_columns = list(final_df.columns)
final_df_columns.remove('latitude')
final_df_columns.remove('longitude')
final_df_columns.sort()  # Sort the columns based on timestamp
final_df_columns = ['latitude', 'longitude'] + final_df_columns  # Place latitude and longitude columns first
final_df = final_df[final_df_columns]

# Extract date from the first file for the output filename
output_date = dfs[0].columns[3].split()[0]  # Assuming the first file has the date in its columns

# Specify the output file path and name
output_folder_path = "/Users/kunalpathak9826/Desktop/ISRO/Data/INSAT_CSV"  # Replace this with your desired output folder path
output_file_path = os.path.join(output_folder_path, f"{output_date}_data.csv")

# Write the merged dataframe to the output CSV file
final_df.to_csv(output_file_path, index=False)


In [1]:
import os
import pandas as pd

# Path to the folder containing CSV files
folder_path = "/Users/kunalpathak9826/Desktop/ISRO/Data/INSAT"

# Function to process each CSV file
def process_csv(file_path):
    # Read CSV file
    df = pd.read_csv(file_path)
    
    # Extract date stamp from file name
    file_name = os.path.basename(file_path)
    date_stamp = file_name.split("_")[1].split(".")[0][:8]  # Extracting the date from the filename
    
    # Extract time stamp from file name (if needed)
    time_stamp = file_name.split("_")[1].split(".")[0][8:]  # Extracting the time from the filename
    
    # Rename columns with time stamp
    new_columns = {}
    for col in df.columns[3:]:
        new_columns[col] = f"{date_stamp} {time_stamp} {col}"  # Adding both date and time to the column names
    df.rename(columns=new_columns, inplace=True)
    
    # Drop the 'time' column (if exists)
    if 'date/time' in df.columns:
        df.drop(columns=['date/time'], inplace=True)
    
    return df

# List to hold dataframes of each day
dfs_per_day = {}

# Iterate over all CSV files in the folder
for file_name in os.listdir(folder_path):
    if file_name.endswith(".csv"):
        file_path = os.path.join(folder_path, file_name)
        
        # Extract date from the filename
        date_stamp = file_name.split("_")[1].split(".")[0][:8]
        
        # If the date is not already in the dictionary, create a new list
        if date_stamp not in dfs_per_day:
            dfs_per_day[date_stamp] = []
        
        # Append the file path to the list corresponding to that date
        dfs_per_day[date_stamp].append(file_path)

# Merge dataframes for each day
for date_stamp, file_paths in dfs_per_day.items():
    # List to hold dataframes of the same day
    dfs_same_day = []
    
    # Process each file for the same day
    for file_path in file_paths:
        dfs_same_day.append(process_csv(file_path))
    
    # Merge dataframes on latitude and longitude for the same day
    final_df_same_day = dfs_same_day[0]  # Start with the first dataframe
    for df_same_day in dfs_same_day[1:]:
        final_df_same_day = pd.merge(final_df_same_day, df_same_day, on=['latitude', 'longitude'], how='outer')

    # Reorder columns based on timestamp in GMT
    final_df_same_day_columns = list(final_df_same_day.columns)
    final_df_same_day_columns.remove('latitude')
    final_df_same_day_columns.remove('longitude')
    final_df_same_day_columns.sort()  # Sort the columns based on timestamp
    final_df_same_day_columns = ['latitude', 'longitude'] + final_df_same_day_columns  # Place latitude and longitude columns first
    final_df_same_day = final_df_same_day[final_df_same_day_columns]

    # Specify the output file path and name for the same day
    output_folder_path = "/Users/kunalpathak9826/Desktop/ISRO/Data/INSAT_CSV"  # Replace this with your desired output folder path
    output_file_path = os.path.join(output_folder_path, f"{date_stamp}_data.csv")

    # Write the merged dataframe to the output CSV file for the same day
    final_df_same_day.to_csv(output_file_path, index=False)
