In [None]:
import requests
import json
from datetime import datetime
import pandas as pd

def make_api_call(api_url):
    session = requests.session()
    session.proxies = {
        'http': 'socks5h://localhost:9150',
        'https': 'socks5h://localhost:9150'
    }

    try:
        response = session.get(api_url)
        if response.status_code == 200:
            return response.json()
        else:
            print(f"API request failed with status code {response.status_code}")
            return None
    except Exception as e:
        print(f"An error occurred: {str(e)}")
        return None


In [None]:
# Example API endpoint URL
alphv_api_url = 'http://alphvmmm27o3abo3r2mlmjrpdmzle3rykajqc5xsj7j7ejksbpsa36ad.onion/api/blog/brief/0/1000'

# Make the API call
response_data_brief = make_api_call(alphv_api_url)

# Process the response data as needed
if response_data_brief:
    # Process the JSON response data
    print(json.dumps(response_data_brief, indent=2))

In [None]:
# Initialize empty lists to store extracted values
ids = []
titles = []
created_dates = []
updated_dates = []
website_links = []
countries = []
descriptions = []

In [None]:
for item in response_data_brief['items']:

    print(item['title'])

    # Convert epoch milliseconds to datetime objects
    created_dt = datetime.fromtimestamp(item['createdDt'] / 1000.0) if item['createdDt'] else None
    updated_dt = datetime.fromtimestamp(item['updatedDt'] / 1000.0) if item['updatedDt'] else None

    # Skip if article is before 2023
    if created_dt <= datetime(2023, 1, 1):
        continue

    else:
        id = item['id']
        title = item['title']
        # Format the datetime objects as "dd/mm/yyyy"
        created_date_str = created_dt.strftime("%d/%m/%Y") if created_dt else None
        updated_date_str = updated_dt.strftime("%d/%m/%Y") if updated_dt else None

        # Get additional information for this post:
        url = f'http://alphvmmm27o3abo3r2mlmjrpdmzle3rykajqc5xsj7j7ejksbpsa36ad.onion/api/blog/{id}'
        data_response_detailed = make_api_call(url)
        if data_response_detailed:
            try:
                website_link = data_response_detailed['publication']['url']
            except:
                website_link = None
                print("website_link returns NoneType")

            try:
                country = data_response_detailed['publication']['country']
            except:
                country = None
                print("country returns NoneType")

            try:
                description = data_response_detailed['publication']['description']
            except:
                description = None
                print("description returns NoneType")

        # Append to list
        ids.append(item['id'])
        titles.append(item['title'])
        created_dates.append(created_date_str)
        updated_dates.append(updated_date_str)
        website_links.append(website_link)
        countries.append(country)
        descriptions.append(description)

    

    
        

        

In [None]:
# Print the extracted lists with datetime objects
print("IDs:", ids)
print("Titles:", titles)
print("Created Dates:", created_dates)
print("Updated Dates:", updated_dates)
print("Website Links:", website_links)
print("Countries:", countries)
print("Descriptions:", descriptions)

In [None]:
# Create a dictionary to store the data
data_dict = {
    'id': ids,
    'title': titles,
    'created_date': created_dates,
    'updated_date': updated_dates,
    'website_link': website_links,
    'country': countries,
    'description': descriptions
}

In [None]:
# Create a Pandas DataFrame from the dictionary
df = pd.DataFrame(data_dict)

# Print the DataFrame
df

In [None]:
df.to_csv("alphv_data.csv")