In [1]:
import re
import ipaddress
import pandas as pd
import requests
import time
import json

In [3]:
class ThreatIntelEnricher:
    def __init__(self, api_key):
        """
        Constructor: create the class to store the API key
        """
        self.api_key = api_key
        #  AbuseIPDB link
        self.base_url = "https://api.abuseipdb.com/api/v2/check"
        self.headers = {
            'Key': self.api_key,
            'Accept': 'application/json'
        }

    def extract_ips(self, log_content):
        """
        Extract the IP From Any Text Using Regular Expressions
        """
        # Regex Pattern for IPv4
        pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
        raw_ips = re.findall(pattern, log_content)
        #Convert to set to remove duplicates, then back to list
        return list(set(raw_ips))

    def filter_ips(self, ip_list):
        """
       Address Filtering: Separate Public IPs (to scan) from Private/Local IPs (to ignore).
        """
        public_ips = []
        ignored_ips = []

        for ip in ip_list:
            try:
                ip_obj = ipaddress.ip_address(ip)
                #Condition: If it is not Private, not Loopback (127.0.0.1), and not Multicast
                if not ip_obj.is_private and not ip_obj.is_loopback and not ip_obj.is_multicast:
                    public_ips.append(ip)
                else:
                    ignored_ips.append(ip)
            except ValueError:
                #   for invalid ip format (for example : 999.999.999.999)
                continue

        return public_ips, ignored_ips

print("structure built successfully!")

structure built successfully!


In [5]:
#TEST FILTER AND EXTRACT WITHOUT INTERNET
# 1. fake log
dummy_log = """
User login failed from 192.168.1.5 (Internal) at 10:00 AM.
Suspicious activity detected from 118.25.6.39 (Malicious known).
Server ping from 127.0.0.1 (Loopback).
Brute force attempt from 45.227.255.255 (Potential threat).
Router connect 10.0.0.5 (Private).
"""

# 2. create copy from the tools
enricher = ThreatIntelEnricher(api_key="YOUR-KEY")

# 3. extract
extracted = enricher.extract_ips(dummy_log)
print(f" extract ip (raw): {extracted}")

# 4. filter
public_ips, private_ips = enricher.filter_ips(extracted)

print(f"Public IP : {public_ips}")
print(f"Private IP : {private_ips}")

 extract ip (raw): ['192.168.1.5', '45.227.255.255', '118.25.6.39', '10.0.0.5', '127.0.0.1']
Public IP : ['45.227.255.255', '118.25.6.39']
Private IP : ['192.168.1.5', '10.0.0.5', '127.0.0.1']


In [6]:
# --- Updated Cell: Class with real API Connect ---
import re
import ipaddress
import pandas as pd
import requests
import time
import json
from datetime import datetime

class ThreatIntelEnricher:
    def __init__(self, api_key):
        """
        Constructor: Initialize the class and store the API key.
        """
        self.api_key = api_key
        # Base URL for AbuseIPDB API v2
        self.base_url = "https://api.abuseipdb.com/api/v2/check"
        self.headers = {
            'Key': self.api_key,
            'Accept': 'application/json'
        }

    def extract_ips(self, log_content):
        """
        Extracts all IPv4 addresses from a given text string using Regular Expressions.
        """
        # Regex Pattern for IPv4
        pattern = r'\b(?:\d{1,3}\.){3}\d{1,3}\b'
        raw_ips = re.findall(pattern, log_content)
        # Use set to remove duplicates, then convert back to list
        return list(set(raw_ips))

    def filter_ips(self, ip_list):
        """
        Filters IP addresses: Separates Public IPs (to be scanned)
        from Private/Local IPs (to be ignored).
        """
        public_ips = []
        ignored_ips = []

        for ip in ip_list:
            try:
                ip_obj = ipaddress.ip_address(ip)
                #Condition: If it is not Private, not Loopback (127.0.0.1), and not Multicast
                if not ip_obj.is_private and not ip_obj.is_loopback and not ip_obj.is_multicast:
                    public_ips.append(ip)
                else:
                    ignored_ips.append(ip)
            except ValueError:
                #   for invalid ip format (for example : 999.999.999.999) found by regex
                continue

        return public_ips, ignored_ips

    def get_ip_reputation(self, ip_address):
        """
        Queries AbuseIPDB for a single IP address and returns specific threat data.
        """
        # Query parameters: Check last 90 days of history
        querystring = {
            'ipAddress': ip_address,
            'maxAgeInDays': '90',
            'verbose': 'true'
        }

        try:
            # Send GET request to API
            response = requests.request(method='GET', url=self.base_url, headers=self.headers, params=querystring)

            if response.status_code == 200:
                # Parse JSON response
                decoded_response = json.loads(response.text)
                data = decoded_response['data']

                # Return a dictionary with selected fields
                return {
                    'IP': data.get('ipAddress'),
                    'Abuse_Score': data.get('abuseConfidenceScore'),
                    'Country': data.get('countryCode'),
                    'ISP': data.get('isp'),
                    'Domain': data.get('domain'),
                    'Total_Reports': data.get('totalReports'),
                    'Last_Reported': data.get('lastReportedAt')
                }

            elif response.status_code == 429:
                print(f" Rate Limit Exceeded for IP: {ip_address}")
                return None
            elif response.status_code == 401:
                print(" Error: Invalid or inactive API Key.")
                return None
            else:
                print(f" Unexpected Error ({response.status_code}) for IP: {ip_address}")
                return None

        except Exception as e:
            print(f" Connection Error: {e}")
            return None



In [8]:
# --- Main Execution & Report Generation ---
from tqdm import tqdm  # Library for progress bars
import pandas as pd
import time

# 1. Setup API Key (Replace if not already defined)
MY_KEY = "YOUR_API_KEY_HERE"

# 2. Simulate a Log File (Input Data)
# In a real scenario, we would open a file: with open('access.log', 'r') as f: ...
mock_log_data = """
[2023-10-27 08:00:01] User admin login failed from 192.168.1.15 (Internal).
[2023-10-27 08:05:23] Suspicious port scan detected from 45.155.205.233 (Known Scanner).
[2023-10-27 08:10:00] Connection attempt from 185.191.171.35 (Malicious).
[2023-10-27 08:15:45] Regular traffic from 8.8.8.8 (Google DNS - Clean).
[2023-10-27 08:20:00] Local router access 10.0.0.1 (Private).
[2023-10-27 08:22:11] Another threat source 103.29.70.0 (Malicious).
"""

print(" Starting Automated Threat Intelligence Enricher...")

# 3. Initialize the Tool
enricher = ThreatIntelEnricher(api_key=MY_KEY)

# 4. Extract and Filter IPs
print(" Extracting and Filtering IPs...")
extracted_ips = enricher.extract_ips(mock_log_data)
public_ips, private_ips = enricher.filter_ips(extracted_ips)

print(f"   - Total IPs found: {len(extracted_ips)}")
print(f"   - Private IPs (Ignored): {len(private_ips)}")
print(f"   - Public IPs (To Scan): {len(public_ips)}")

# 5. Enrichment Loop (The Core Logic)
results = []

print("\n Querying AbuseIPDB for Reputation...")
# tqdm creates a progress bar for the loop
for ip in tqdm(public_ips):
    report = enricher.get_ip_reputation(ip)
    if report:
        results.append(report)
    # Important: Sleep to respect API rate limits (avoid getting banned)
    time.sleep(0.5)

# 6. Generate Excel Report
print("\n Generating Excel Report...")
if results:
    df = pd.read_json(json.dumps(results)) # Convert list of dicts to DataFrame

    # Reorder columns for better readability
    cols = ['IP', 'Abuse_Score', 'Country', 'ISP', 'Domain', 'Total_Reports', 'Last_Reported']
    # Ensure columns exist before reordering (in case of empty data)
    df = df[cols] if set(cols).issubset(df.columns) else df

    # Save to Excel
    file_name = "Threat_Intel_Report.xlsx"
    df.to_excel(file_name, index=False)
    print(f" Success! Report saved as: {file_name}")

    # Display the first few rows here
    print("\n--- Preview of Results ---")
    print(df.head())
else:
    print(" No results to save.")

 Starting Automated Threat Intelligence Enricher...
 Extracting and Filtering IPs...
   - Total IPs found: 6
   - Private IPs (Ignored): 2
   - Public IPs (To Scan): 4

 Querying AbuseIPDB for Reputation...


100%|██████████| 4/4 [00:02<00:00,  1.40it/s]
  df = pd.read_json(json.dumps(results)) # Convert list of dicts to DataFrame



 Generating Excel Report...
 Success! Report saved as: Threat_Intel_Report.xlsx

--- Preview of Results ---
               IP  Abuse_Score Country  \
0     103.29.70.0            0      JP   
1  185.191.171.35            0      US   
2  45.155.205.233            0      RU   
3         8.8.8.8            0      US   

                                          ISP       Domain  Total_Reports  \
0             Akamai Connected Cloud / Linode   akamai.com              0   
1                              SEMrush CY LTD  semrush.com              0   
2  Cloud Technologies LLC trading as Cloud.ru     cloud.ru              1   
3                                  Google LLC   google.com             18   

               Last_Reported  
0                       None  
1  2023-08-07T12:17:05+00:00  
2  2025-12-29T22:50:26+00:00  
3  2026-01-27T15:07:17+00:00  


In [10]:
# --- clean the code ---
from tqdm import tqdm
import pandas as pd
import time
import json
import io  # <--- Added this library to fix the warning

# 1. Setup API Key (Ensure your key is here)
MY_KEY = "YOUR_API_KEY_HERE"

# 2. Simulate Log Data
mock_log_data = """
[2023-10-27 08:00:01] User admin login failed from 192.168.1.15 (Internal).
[2023-10-27 08:05:23] Suspicious port scan detected from 45.155.205.233 (Known Scanner).
[2023-10-27 08:10:00] Connection attempt from 185.191.171.35 (Malicious).
[2023-10-27 08:15:45] Regular traffic from 8.8.8.8 (Google DNS - Clean).
[2023-10-27 08:20:00] Local router access 10.0.0.1 (Private).
[2023-10-27 08:22:11] Another threat source 103.29.70.0 (Malicious).
"""

print(" Starting Automated Threat Intelligence Enricher...")

# 3. Initialize Tool
enricher = ThreatIntelEnricher(api_key=MY_KEY)

# 4. Extraction & Filtration
print("Extracting and Filtering IPs...")
extracted_ips = enricher.extract_ips(mock_log_data)
public_ips, private_ips = enricher.filter_ips(extracted_ips)

print(f"   - Total IPs found: {len(extracted_ips)}")
print(f"   - Private IPs (Ignored): {len(private_ips)}")
print(f"   - Public IPs (To Scan): {len(public_ips)}")

# 5. Enrichment Loop
results = []
print("\nQuerying AbuseIPDB for Reputation...")

for ip in tqdm(public_ips):
    report = enricher.get_ip_reputation(ip)
    if report:
        results.append(report)
    time.sleep(0.5)

# 6. Generate Report
print("\n Generating Excel Report...")
if results:
    # FIX: Using io.StringIO to avoid FutureWarning
    json_data = json.dumps(results)
    df = pd.read_json(io.StringIO(json_data))

    cols = ['IP', 'Abuse_Score', 'Country', 'ISP', 'Domain', 'Total_Reports', 'Last_Reported']
    # Select columns if they exist
    existing_cols = [c for c in cols if c in df.columns]
    df = df[existing_cols]

    file_name = "Threat_Intel_Report.xlsx"
    df.to_excel(file_name, index=False)
    print(f" Success! Report saved as: {file_name}")
    print("   (Check the Files folder in the left sidebar to download)")

    print("\n--- Preview ---")
    print(df.head())
else:
    print(" No results to save.")

 Starting Automated Threat Intelligence Enricher...
Extracting and Filtering IPs...
   - Total IPs found: 6
   - Private IPs (Ignored): 2
   - Public IPs (To Scan): 4

Querying AbuseIPDB for Reputation...


100%|██████████| 4/4 [00:02<00:00,  1.52it/s]


 Generating Excel Report...
 Success! Report saved as: Threat_Intel_Report.xlsx
   (Check the Files folder in the left sidebar to download)

--- Preview ---
               IP  Abuse_Score Country  \
0     103.29.70.0            0      JP   
1  185.191.171.35            0      US   
2  45.155.205.233            0      RU   
3         8.8.8.8            0      US   

                                          ISP       Domain  Total_Reports  \
0             Akamai Connected Cloud / Linode   akamai.com              0   
1                              SEMrush CY LTD  semrush.com              0   
2  Cloud Technologies LLC trading as Cloud.ru     cloud.ru              1   
3                                  Google LLC   google.com             18   

               Last_Reported  
0                       None  
1  2023-08-07T12:17:05+00:00  
2  2025-12-29T22:50:26+00:00  
3  2026-01-27T15:07:17+00:00  



