In [9]:
import pandas as pd
import glob
import os

def check_columns():
    """Check column names and data types of all CSV files"""
    # Create a text file to save all column information
    column_file = 'all_columns.txt'

    with open(column_file, 'w') as f:
        # Get all CSV files
        data_dir = 'data'
        all_files = glob.glob(os.path.join(data_dir, "*.csv"))
        
        if not all_files:
            f.write("No CSV files found in the data directory")
            print("No CSV files found")
            return
        
        print(f'Found {len(all_files)} CSV files:')
        f.write(f'Found {len(all_files)} CSV files:\n')
        
        # Process each file
        for filename in all_files:
            try:
                print(f"\nProcessing file: {filename}")
                f.write(f"\nProcessing file: {filename}\n")
                
                # Read just the first few rows to get column names
                df = pd.read_csv(filename, nrows=1)
                
                # Get column names
                columns = df.columns.tolist()
                
                # Print and write to file
                print("Columns:")
                f.write("Columns:\n")
                for col in columns:
                    print(f"- {col}")
                    f.write(f"- {col}\n")
                
                # Print number of columns
                print(f"Total columns: {len(columns)}")
                f.write(f"Total columns: {len(columns)}\n")
                
                # Print data types
                df = pd.read_csv(filename, nrows=10)
                print("\nData Types:")
                f.write("\nData Types:\n")
                dtypes = df.dtypes
                for col, dtype in dtypes.items():
                    print(f"- {col}: {dtype}")
                    f.write(f"- {col}: {dtype}\n")
                
            except Exception as e:
                print(f"Error reading {filename}: {e}")
                f.write(f"Error reading {filename}: {e}\n")

    print(f"\nColumn information saved to {column_file}")

if __name__ == "__main__":
    check_columns()


Found 8 CSV files:

Processing file: data\cyberattack.csv
Columns:
-  Destination Port
-  Flow Duration
-  Total Fwd Packets
-  Total Backward Packets
- Total Length of Fwd Packets
-  Total Length of Bwd Packets
-  Fwd Packet Length Max
-  Fwd Packet Length Min
-  Fwd Packet Length Mean
-  Fwd Packet Length Std
- Bwd Packet Length Max
-  Bwd Packet Length Min
-  Bwd Packet Length Mean
-  Bwd Packet Length Std
- Flow Bytes/s
-  Flow Packets/s
-  Flow IAT Mean
-  Flow IAT Std
-  Flow IAT Max
-  Flow IAT Min
- Fwd IAT Total
-  Fwd IAT Mean
-  Fwd IAT Std
-  Fwd IAT Max
-  Fwd IAT Min
- Bwd IAT Total
-  Bwd IAT Mean
-  Bwd IAT Std
-  Bwd IAT Max
-  Bwd IAT Min
- Fwd PSH Flags
-  Bwd PSH Flags
-  Fwd URG Flags
-  Bwd URG Flags
-  Fwd Header Length
-  Bwd Header Length
- Fwd Packets/s
-  Bwd Packets/s
-  Min Packet Length
-  Max Packet Length
-  Packet Length Mean
-  Packet Length Std
-  Packet Length Variance
- FIN Flag Count
-  SYN Flag Count
-  RST Flag Count
-  PSH Flag Count
-  ACK Flag

ABOVE CODE IS FOR COLUMN CHECKING