In [None]:
# Cell 1: Setup
import sys
sys.path.append('../src')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.web_analyzer.analyzer import WebLogAnalyzer
from src.web_analyzer.patterns import AttackPatternDetector

%matplotlib inline
print("‚úÖ Setup complete!")


In [None]:
# Cell 2: Create Sample Web Logs
sample_logs = [
    '203.0.113.45 - - [20/Sep/2025:14:01:01 +0200] "POST /admin/login HTTP/1.1" 401 1234 "-" "Python-urllib/3.6"',
    '203.0.113.45 - - [20/Sep/2025:14:01:02 +0200] "POST /admin/login HTTP/1.1" 401 1234 "-" "Python-urllib/3.6"',
    '203.0.113.45 - - [20/Sep/2025:14:01:03 +0200] "POST /admin/login HTTP/1.1" 401 1234 "-" "Python-urllib/3.6"',
    '192.168.1.100 - - [20/Sep/2025:13:55:36 +0200] "GET / HTTP/1.1" 200 2326 "-" "Mozilla/5.0"',
    '198.51.100.22 - - [20/Sep/2025:14:05:10 +0200] "GET /search?q=\' UNION SELECT * FROM users-- HTTP/1.1" 500 0 "-" "sqlmap/1.6.2"',
]

print(f"üìä Created {len(sample_logs)} sample log entries")


In [None]:
# Cell 3: Parse Logs
analyzer = WebLogAnalyzer()
parsed_logs = []

for log in sample_logs:
    parsed = analyzer.parse_log_line(log)
    if parsed:
        parsed_logs.append(parsed)

logs_df = pd.DataFrame(parsed_logs)
print(f"\n‚úÖ Parsed {len(logs_df)} log entries")
print("\nüîç First few logs:")
logs_df.head()

In [None]:
# Cell 4: Basic Log Statistics
print("üìà Log Statistics:")
print(f"\nUnique IPs: {logs_df['ip'].nunique()}")
print(f"Total Requests: {len(logs_df)}")

print("\nRequests by Method:")
print(logs_df['method'].value_counts())

print("\nStatus Code Distribution:")
print(logs_df['status'].value_counts())

# Visualize
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

logs_df['method'].value_counts().plot(kind='bar', ax=axes[0], color='skyblue')
axes[0].set_title('Requests by Method')
axes[0].set_ylabel('Count')

logs_df['status'].value_counts().plot(kind='bar', ax=axes[1], color='coral')
axes[1].set_title('Status Code Distribution')
axes[1].set_ylabel('Count')

plt.tight_layout()
plt.show()


In [None]:
# Cell 5: Attack Pattern Detection
print("üîç Analyzing Attack Patterns:\n")

pattern_detector = AttackPatternDetector()

for idx, row in logs_df.iterrows():
    path = row['path']
    patterns = pattern_detector.detect_all_patterns(path)
    
    if patterns:
        print(f"IP: {row['ip']}")
        print(f"Path: {path}")
        print(f"Detected: {', '.join(patterns)}")
        print()


In [None]:
# Cell 6: Train Anomaly Detector
print("üöÄ Training anomaly detector...")

features_df, ip_list = analyzer.train_anomaly_detector(logs_df)

print(f"‚úÖ Training complete!")
print(f"üìä Features extracted: {len(analyzer.feature_names)}")

In [None]:
# Cell 7: Analyze Suspicious IP
suspicious_ip = '203.0.113.45'
ip_logs = logs_df[logs_df['ip'] == suspicious_ip].to_dict('records')

if ip_logs:
    print(f"\nüéØ Analyzing IP: {suspicious_ip}")
    
    result = analyzer.analyze_ip_with_explanation(ip_logs, suspicious_ip)
    
    print(f"\nüö® Risk Level: {result['risk_level']}")
    print(f"üìä Anomaly Score: {result['anomaly_score']:.3f}")
    print(f"üîç Is Anomaly: {result['is_anomaly']}")
    
    print(f"\n‚ö° Attack Patterns:")
    for pattern in result['attack_patterns']:
        print(f"  ‚Ä¢ {pattern}")
    
    print(f"\nüí° Behavioral Insights:")
    for insight in result['behavioral_insights']:
        print(f"  ‚Ä¢ {insight}")
    
    print(f"\nüõ†Ô∏è Recommendations:")
    for rec in result['recommendations']:
        print(f"  ‚Ä¢ {rec}")


In [None]:
# Cell 8: Summary
print("\n‚úÖ Web Log Analysis Complete!")
print("\nüìä Key Findings:")
print("1. Successfully parsed and analyzed web logs")
print("2. Detected multiple attack patterns")
print("3. Identified suspicious IP addresses")
print("\n‚û°Ô∏è Next: Unified analysis in 04_unified_analysis.ipynb")