In [3]:
import pandas as pd
import os

# 1. Folder path containing your raw forex CSV files
folder_path = "C:/Users/yaman/OneDrive/Ø³Ø·Ø­ Ø§Ù„Ù…ÙƒØªØ¨/python/project1/data cleaning/alldata"

# 2. List all CSV files in the folder
files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]

# 3. Define the timeframes to resample
timeframes = {
    '5min': '5T',
    '15min': '15T',
    '30min': '30T',
    '1h': '1H',
    '4h': '4H',
    '1d': '1D'
}

# 4. Loop through each file
for file_name in files:
    file_path = os.path.join(folder_path, file_name)
    
    # --- Read file and remove the last column ---
    df = pd.read_csv(file_path, header=None, encoding='utf-16')  # adjust encoding if needed
    df = df.iloc[:, :-1]  # remove the last column if extra
    
    # --- Add the seven column names ---
    df.columns = ['DateTime', 'Open', 'High', 'Low', 'Close', 'Volume']
    
    # --- Convert DateTime column to datetime type ---
    df['DateTime'] = pd.to_datetime(df['DateTime'], format='%Y.%m.%d %H:%M')
    
    # --- Remove duplicate rows ---
    df = df.drop_duplicates()
    
    # --- Sort by DateTime ---
    df = df.sort_values('DateTime').reset_index(drop=True)
    
    # --- Save a clean base version of the file ---
    clean_name = file_name.replace('.csv', '_clean.csv')
    df.to_csv(os.path.join(folder_path, clean_name), index=False, encoding='utf-16')
    print(f"âœ… Clean version saved: {clean_name}")
    
    # --- Resample data to different timeframes and save each ---
    for tf_name, rule in timeframes.items():
        df_resampled = df.resample(rule, on='DateTime').agg({
            'Open':'first',
            'High':'max',
            'Low':'min',
            'Close':'last',
            'Volume':'sum'
        })
        
        # Drop any rows with missing values
        df_resampled = df_resampled.dropna()
        
        # Reset index
        df_resampled = df_resampled.reset_index()
        
        # Save resampled file
        tf_file_name = file_name.replace('.csv', f'_{tf_name}.csv')
        df_resampled.to_csv(os.path.join(folder_path, tf_file_name), index=False, encoding='utf-16')
        print(f"   âœ… {tf_name} timeframe saved: {tf_file_name}")

print("ðŸŽ¯ All files processed and converted to requested timeframes successfully!")


âœ… Clean version saved: GBPUSDM1_clean.csv
   âœ… 5min timeframe saved: GBPUSDM1_5min.csv
   âœ… 15min timeframe saved: GBPUSDM1_15min.csv
   âœ… 30min timeframe saved: GBPUSDM1_30min.csv
   âœ… 1h timeframe saved: GBPUSDM1_1h.csv
   âœ… 4h timeframe saved: GBPUSDM1_4h.csv
   âœ… 1d timeframe saved: GBPUSDM1_1d.csv
âœ… Clean version saved: USDJPYM1_clean.csv
   âœ… 5min timeframe saved: USDJPYM1_5min.csv
   âœ… 15min timeframe saved: USDJPYM1_15min.csv
   âœ… 30min timeframe saved: USDJPYM1_30min.csv
   âœ… 1h timeframe saved: USDJPYM1_1h.csv
   âœ… 4h timeframe saved: USDJPYM1_4h.csv
   âœ… 1d timeframe saved: USDJPYM1_1d.csv
âœ… Clean version saved: XAUUSDM1_clean.csv
   âœ… 5min timeframe saved: XAUUSDM1_5min.csv
   âœ… 15min timeframe saved: XAUUSDM1_15min.csv
   âœ… 30min timeframe saved: XAUUSDM1_30min.csv
   âœ… 1h timeframe saved: XAUUSDM1_1h.csv
   âœ… 4h timeframe saved: XAUUSDM1_4h.csv
   âœ… 1d timeframe saved: XAUUSDM1_1d.csv
ðŸŽ¯ All files processed and converted to requ