# Convert Excel Files to CSV
## Run this FIRST before training

This notebook converts your SWaT Excel files to CSV format.

---

## Step 1: Mount Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted!")

## Step 2: Install Required Library

In [None]:
!pip install openpyxl
print("\n‚úÖ openpyxl installed!")

## Step 3: Configuration

Update these paths to match your actual file names!

In [None]:
import pandas as pd
import os

# =============================================================
# CONFIGURATION - UPDATE THESE!
# =============================================================

# Your Google Drive folder path
FOLDER_PATH = '/content/drive/MyDrive/4th7thsemproject'

# Excel file names (update these to match your actual file names!)
# Check your Google Drive folder for the exact names
NORMAL_EXCEL = 'SWaT_Dataset_Normal_v1.xlsx'    # Training data
ATTACK_EXCEL = 'SWaT_Dataset_Attack_v0.xlsx'    # Test data

# Output CSV names
NORMAL_CSV = 'SWaT_Normal.csv'
ATTACK_CSV = 'SWaT_Attack.csv'

print("Configuration set!")
print(f"Folder: {FOLDER_PATH}")

## Step 4: Check What Files Exist

In [None]:
# List all files in your folder
print(f"Files in {FOLDER_PATH}:\n")

if os.path.exists(FOLDER_PATH):
    files = os.listdir(FOLDER_PATH)
    for f in files:
        file_path = os.path.join(FOLDER_PATH, f)
        size_mb = os.path.getsize(file_path) / (1024 * 1024)
        print(f"  üìÑ {f} ({size_mb:.2f} MB)")
else:
    print(f"‚ùå Folder not found: {FOLDER_PATH}")
    print("\nPlease check the folder path!")

## Step 5: Convert Excel to CSV

In [None]:
def convert_excel_to_csv(excel_name, csv_name, folder_path):
    """
    Convert an Excel file to CSV format.
    """
    excel_path = os.path.join(folder_path, excel_name)
    csv_path = os.path.join(folder_path, csv_name)
    
    # Check if Excel file exists
    if not os.path.exists(excel_path):
        print(f"‚ùå File not found: {excel_path}")
        return False
    
    print(f"üìñ Reading: {excel_name}")
    print("   This may take a few minutes for large files...")
    
    # Read Excel file
    df = pd.read_excel(excel_path)
    
    print(f"   ‚úÖ Loaded {df.shape[0]:,} rows √ó {df.shape[1]} columns")
    
    # Show column names
    print(f"   Columns: {df.columns.tolist()[:5]}...")
    
    # Save as CSV
    print(f"üíæ Saving: {csv_name}")
    df.to_csv(csv_path, index=False)
    
    # Verify
    csv_size = os.path.getsize(csv_path) / (1024 * 1024)
    print(f"   ‚úÖ Saved! Size: {csv_size:.2f} MB")
    
    return True

In [None]:
# Convert Normal (Training) Data
print("="*60)
print("CONVERTING NORMAL DATA (Training)")
print("="*60)

convert_excel_to_csv(NORMAL_EXCEL, NORMAL_CSV, FOLDER_PATH)

In [None]:
# Convert Attack (Test) Data
print("="*60)
print("CONVERTING ATTACK DATA (Testing)")
print("="*60)

convert_excel_to_csv(ATTACK_EXCEL, ATTACK_CSV, FOLDER_PATH)

## Step 6: Verify Converted Files

In [None]:
# List files again to confirm CSVs were created
print("Files in folder after conversion:\n")

files = os.listdir(FOLDER_PATH)
for f in sorted(files):
    file_path = os.path.join(FOLDER_PATH, f)
    size_mb = os.path.getsize(file_path) / (1024 * 1024)
    icon = "‚úÖ" if f.endswith('.csv') else "üìÑ"
    print(f"  {icon} {f} ({size_mb:.2f} MB)")

## Step 7: Preview the Data

In [None]:
# Preview Normal data
normal_csv_path = os.path.join(FOLDER_PATH, NORMAL_CSV)
if os.path.exists(normal_csv_path):
    print("NORMAL DATA PREVIEW:")
    df_normal = pd.read_csv(normal_csv_path, nrows=5)
    display(df_normal)
    print(f"\nColumn names ({len(df_normal.columns)} total):")
    print(df_normal.columns.tolist())

In [None]:
# Preview Attack data
attack_csv_path = os.path.join(FOLDER_PATH, ATTACK_CSV)
if os.path.exists(attack_csv_path):
    print("ATTACK DATA PREVIEW:")
    df_attack = pd.read_csv(attack_csv_path, nrows=5)
    display(df_attack)
    
    # Check label distribution
    print("\nChecking for label column...")
    df_attack_full = pd.read_csv(attack_csv_path)
    for col in df_attack_full.columns:
        if 'attack' in col.lower() or 'normal' in col.lower() or 'label' in col.lower():
            print(f"\nFound label column: '{col}'")
            print(df_attack_full[col].value_counts())

## Done!

Your CSV files are ready. Now you can run:
1. `01_Train_IsolationForest_SWaT.ipynb` - to train the model
2. `02_Inference_IsolationForest_SWaT.ipynb` - to make predictions

Use these paths in the training/inference notebooks:
```python
TRAIN_DATA_PATH = '/content/drive/MyDrive/4th7thsemproject/SWaT_Normal.csv'
TEST_DATA_PATH = '/content/drive/MyDrive/4th7thsemproject/SWaT_Attack.csv'
```