### Step 1. Import Libraries - This section loads essential libraries for data parsing and visualization.

In [35]:
import pandas as pd
from tabulate import tabulate

### Step 2. Load Data

In [36]:
import os
import pandas as pd

# Define the expected data path
data_path = '../data/sample_firewall_rules.csv'

# Check if file exists
if not os.path.exists(data_path):
    print(f"❌ File not found at: {os.path.abspath(data_path)}")
    print("\n🔍 Tip: Check your folder structure. You may need to adjust the path.")
else:
    print(f"✅ File found: {os.path.abspath(data_path)}")
    # Try reading the CSV
    df = pd.read_csv(data_path)
    print("✅ CSV loaded successfully!\n")
    print("📋 Preview of your data:")
    print(df.head())


✅ File found: /Users/lindarodriguez/Documents/firewall_validator_project/data/sample_firewall_rules.csv
✅ CSV loaded successfully!

📋 Preview of your data:
   RuleID     SourceIP DestinationIP  Port Protocol Action
0       1    0.0.0.0/0  192.168.1.10    22      TCP  ALLOW
1       2  192.168.1.5  192.168.1.10   443      TCP  ALLOW
2       3    0.0.0.0/0  192.168.1.10  3389      TCP  ALLOW
3       4   10.0.0.0/8  192.168.1.20    80      TCP   DENY


### Step 3. Define Validation Rules - check for "Allow all" sources; unrestricted ports (i.e. 22, 3389 open to all); missing deny statements

In [37]:
def validate_firewall_rules(df):
    issues = []
    
    for index, row in df.iterrows():
        if row['SourceIP'] == '0.0.0.0/0' and row['Action'].upper() == 'ALLOW':
            issues.append({
                'RuleID': row['RuleID'],
                'Issue': 'Overly permissive rule',
                'Description': f"Rule {row['RuleID']} allows traffic from any source."
            })
        
        if row['Port'] in [22, 3389] and row['SourceIP'] == '0.0.0.0/0':
            issues.append({
                'RuleID': row['RuleID'],
                'Issue': 'Sensitive port open to all',
                'Description': f"Port {row['Port']} is exposed globally."
            })
    
    return pd.DataFrame(issues)


### Step 4. Run Validation -This prints a clean, readable table of all flagged issues.

In [38]:
results = validate_firewall_rules(df)
print(tabulate(results, headers='keys', tablefmt='pretty'))

+---+--------+----------------------------+----------------------------------------+
|   | RuleID |           Issue            |              Description               |
+---+--------+----------------------------+----------------------------------------+
| 0 |   1    |   Overly permissive rule   | Rule 1 allows traffic from any source. |
| 1 |   1    | Sensitive port open to all |      Port 22 is exposed globally.      |
| 2 |   3    |   Overly permissive rule   | Rule 3 allows traffic from any source. |
| 3 |   3    | Sensitive port open to all |     Port 3389 is exposed globally.     |
+---+--------+----------------------------+----------------------------------------+


### Step 5. Save Results

In [39]:
results.to_csv('../reports/firewall_validation_report.csv', index=False)

### Step 6. Add Markdown Cells in Jupyter

#### Document each steps in between the code cells using Markdown headers to keep a track of progress of the project.

### Step 7. Extending the Firewall Rule Validator - Turning the prototype into a professional extensible tool. In this section, support is added for:
- JSON input format
- Severity levels for each finding
- Compliance reference tags (CIS/OWASP)

In [40]:
import json
import pandas as pd
from tabulate import tabulate

# Try to load JSON file as alternate input
json_path = '../data/firewall_rules_mock.json'  # 👈 Change if your filename differs

try:
    with open(json_path, 'r') as f:
        rules_json = json.load(f)
        df_json = pd.DataFrame(rules_json)
        print("✅ JSON file loaded successfully!")
        print(df_json.head())
except FileNotFoundError:
    print("⚠️ No JSON file found. Using the CSV data instead.")
    df_json = df  # fallback to your existing CSV DataFrame


✅ JSON file loaded successfully!
   Rule_ID     Source_IP Destination_IP  Port Protocol Action
0        1     0.0.0.0/0       10.0.0.5    22      TCP  Allow
1        2  192.168.1.10       10.0.0.5   443      TCP  Allow
2        3     0.0.0.0/0      10.0.0.10  3389    Allow    NaN
3        4   10.0.0.0/24       10.0.0.5    80      TCP  Allow


### Step 8. Adding Severity Levels
Each finding will include a 'Severity' field ranked as:
- High: Critical exposure (e.g., SSH/RDP open to all)
- Medium: General over-permissive rule
- Low: Minor deviation

In [47]:
def validate_firewall_rules_extended(df):
    issues = []

    for _, row in df.iterrows():
        # Overly permissive rule
        if row['SourceIP'] == '0.0.0.0/0' and row['Action'].upper() == 'ALLOW':
            issues.append({
                'RuleID': row['RuleID'],
                'Issue': 'Overly permissive rule',
                'Severity': 'Medium',
                'Description': f"Rule {row['RuleID']} allows traffic from any source."
            })

        # Sensitive ports (SSH, RDP)
        if row['Port'] in [22, 3389] and row['SourceIP'] == '0.0.0.0/0':
            issues.append({
                'RuleID': row['RuleID'],
                'Issue': 'Sensitive port open to all',
                'Severity': 'High',
                'Description': f"Port {row['Port']} is exposed globally."
            })

    return pd.DataFrame(issues)

In [48]:
print("📋 Columns in df_json:")
print(df_json.columns.tolist())

print("\n🔍 Preview of the data:")
print(df_json.head())

📋 Columns in df_json:
['Rule_ID', 'Source_IP', 'Destination_IP', 'Port', 'Protocol', 'Action']

🔍 Preview of the data:
   Rule_ID     Source_IP Destination_IP  Port Protocol Action
0        1     0.0.0.0/0       10.0.0.5    22      TCP  Allow
1        2  192.168.1.10       10.0.0.5   443      TCP  Allow
2        3     0.0.0.0/0      10.0.0.10  3389    Allow    NaN
3        4   10.0.0.0/24       10.0.0.5    80      TCP  Allow


In [58]:
df_json = df_json.rename(columns={'Rule_ID':'RuleID', 'Source_IP': 'SourceIP', 'Destination_IP': 'DestinationIP', 'port': 'Port', 'protocol': 'Protocol', 'action': 'Action'
})

print("✅ Renamed columns:")
print(df_json.columns.tolist())


✅ Renamed columns:
['RuleID', 'SourceIP', 'DestinationIP', 'Port', 'Protocol', 'Action']


In [67]:
df_json['Action'] = df_json['Action'].fillna('UNKNOWN')

print("📊 Missing values per column:")
print(df_json.isna().sum())

📊 Missing values per column:
RuleID           0
SourceIP         0
DestinationIP    0
Port             0
Protocol         0
Action           0
dtype: int64


In [68]:
results_extended = validate_firewall_rules_extended(df_json)
print(tabulate(results_extended, headers='keys', tablefmt='pretty'))

+---+--------+----------------------------+----------+----------------------------------------+
|   | RuleID |           Issue            | Severity |              Description               |
+---+--------+----------------------------+----------+----------------------------------------+
| 0 |   1    |   Overly permissive rule   |  Medium  | Rule 1 allows traffic from any source. |
| 1 |   1    | Sensitive port open to all |   High   |      Port 22 is exposed globally.      |
| 2 |   3    | Sensitive port open to all |   High   |     Port 3389 is exposed globally.     |
+---+--------+----------------------------+----------+----------------------------------------+


### Step 9. Documentation
- Two rules were overly permissive
- Recommendation to restrict SSH and RDP to internal ranges only.
- Notebook to be exported for submission