# Monthly Story Stats

In [46]:
import requests
import json
import pandas as pd
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
from datetime import datetime, timedelta
import calendar

url = 'https://medium.com/_/graphql'
headers = {
    'cookie': 'nonce=L4H2WYxN; uid=75a03e334406; sid=1:WNtHAkbjaPYuHZv6y/nwjE5mUzXRj3KfmAH6n8EQdqWxVuEFRGAjsvHsW2TP8r5f; xsrf=10a06cd49ecb; _cfuvid=lkpzJLFquiVtYRo2ERyjlvw2YSwvbtMxEv8IvNq38bc-1742138913168-0.0.1.1-604800000; cf_clearance=69i4Ooh7.1J8_mKkQIriOqsQsnS24G9JIYPiPPAHOXg-1742139792-1.2.1.1-LeWAsHDq6xEiYniZ7YlHHPZvh1f1ndlnOys0Exju280z4K4JR0803FMHqGD4T7e8SD_6U1Oj20FbGUyQHVfKuWPpWtiEiQTzkQbsLnnQmv52yJoWCTzbCTM1_LDfsQ8BRzk_0Oc2K1.Ql0LoguzcKYuE52.rlai16K5nFw8dOJ3.SlMA_JhQFFUyOYaYHbA7u6uhCUrozM4XKKsRfUW7b3o9JHatvTaKcgCQbJ5_R8jM_iyTvAMJx91IUM4w.pfkdm_KpNk4CQA02xJa_3AK0xbVlnXcXiE65Tk8qGNeEQ7TMOa748u3Sh4Td7MpWjR7qHeB4xrD_a865smZpJE4jMGX8T6MuyT1S3CW22G1Iug; _dd_s=rum=0&expire=1742140931186',  # Replace with actual token
}

def get_created_at():
    payload = [{"operationName":"ViewerQuery","variables":{},"query":"query ViewerQuery {\n  viewer {\n    __typename\n    id\n    allowEmailAddressSharingEditorWriter\n    dismissableFlags\n    emailObfuscated\n    geolocation {\n      country\n      __typename\n    }\n    hasGroupGiftingEnabled\n    hasPastMemberships\n    hasSubdomain\n    imageId\n    isEligibleToImportEmails\n    isEligibleToViewNewResponses\n    isMembershipTrialEligible\n    isSuspended\n    membership {\n      id\n      tier\n      memberSince\n      friendSince\n      __typename\n    }\n    name\n    partnerProgramEnrollment {\n      id\n      status\n      __typename\n    }\n    postSubscribeMembershipUpsellShownAt\n    styleEditorOnboardingVersionSeen\n    twitterScreenName\n    unverifiedEmail\n    username\n    viewerEdge {\n      id\n      createdAt\n      __typename\n    }\n  }\n}\n"}]
    response = requests.post(url, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()[0]['data']['viewer']['viewerEdge']['createdAt']
    else:
        print("Failed to fetch 'createdAt' data.")
        return None

def generate_monthly_timestamps(created_at):
    start_date = datetime.fromtimestamp(created_at / 1000)
    current_date = datetime.now()
    timestamps = []

    while start_date <= current_date:
        _, last_day = calendar.monthrange(start_date.year, start_date.month)
        next_month = start_date.replace(day=1) + timedelta(days=last_day)
        end_time = min(next_month, current_date)
        timestamps.append((
            int(start_date.timestamp() * 1000),
            int(end_time.timestamp() * 1000)
        ))
        start_date = next_month

    return timestamps

# Step 3: Fetch data for each month
def fetch_data(time_range):
    start_time, end_time = time_range
    payload = {"operationName":"UserMonthlyStoryStatsTimeseriesQuery","variables":{"username":"kalyan45","input":{"startTime":start_time,"endTime":end_time}},"query":"query UserMonthlyStoryStatsTimeseriesQuery($username: ID!, $input: UserPostsAggregateStatsInput!) {\n  user(username: $username) {\n    id\n    postsAggregateTimeseriesStats(input: $input) {\n      __typename\n      ... on AggregatePostTimeseriesStats {\n        ...MonthlyStoryStats_aggregatePostTimeseriesStats\n        __typename\n      }\n    }\n    __typename\n  }\n}\n\nfragment MonthlyStoryStatsTotals_postStats on PostStats {\n  viewers\n  readers\n  __typename\n}\n\nfragment MonthlyStoryStatsChart_postStatsPoint on PostStatsPoint {\n  timestamp\n  stats {\n    total {\n      viewers\n      readers\n      __typename\n    }\n    __typename\n  }\n  __typename\n}\n\nfragment MonthlyStoryStats_aggregatePostTimeseriesStats on AggregatePostTimeseriesStats {\n  totalStats {\n    ...MonthlyStoryStatsTotals_postStats\n    __typename\n  }\n  points {\n    ...MonthlyStoryStatsChart_postStatsPoint\n    __typename\n  }\n  __typename\n}\n"}
    response = requests.post(url, headers=headers, json=payload)
    if response.status_code == 200:
        return response.json()
    else:
        return None

# Step 4: Parallel execution with ThreadPoolExecutor and tqdm
def collect_data():
    created_at = get_created_at()
    if not created_at:
        return

    all_data = []
    time_ranges = generate_monthly_timestamps(created_at)

    with ThreadPoolExecutor(max_workers=10) as executor:
        futures = {executor.submit(fetch_data, time_range): time_range for time_range in time_ranges}
        for future in tqdm(as_completed(futures), total=len(futures), desc="Fetching Data"):
            data = future.result()
            if data:
                points = data['data']['user']['postsAggregateTimeseriesStats']['points']
                for point in points:
                    timestamp = datetime.fromtimestamp(point['timestamp'] / 1000).strftime('%Y-%m-%d %H:%M:%S')
                    viewers = point['stats']['total']['viewers']
                    readers = point['stats']['total']['readers']
                    all_data.append({'timestamp': timestamp, 'viewers': viewers, 'readers': readers})

    # Convert to DataFrame and save
    df = pd.DataFrame(all_data)
    df.to_csv('medium_full_data.csv', index=False)
    print("CSV file 'medium_full_data.csv' saved successfully.")

if __name__ == "__main__":
    collect_data()

Fetching Data: 100%|███████████████████████████████████████████████████████████████████| 35/35 [00:02<00:00, 16.90it/s]

CSV file 'medium_full_data.csv' saved successfully.





# Audience Stats

In [54]:
import pandas as pd
import requests
from io import StringIO

url = 'https://medium.com/@kalyan45/audience/stats/export?source=--------------------------------------------'

# Headers to mimic a browser request
headers = {
    'cookie': 'nonce=L4H2WYxN; uid=75a03e334406; sid=1:WNtHAkbjaPYuHZv6y/nwjE5mUzXRj3KfmAH6n8EQdqWxVuEFRGAjsvHsW2TP8r5f; xsrf=10a06cd49ecb; _cfuvid=lkpzJLFquiVtYRo2ERyjlvw2YSwvbtMxEv8IvNq38bc-1742138913168-0.0.1.1-604800000; cf_clearance=69i4Ooh7.1J8_mKkQIriOqsQsnS24G9JIYPiPPAHOXg-1742139792-1.2.1.1-LeWAsHDq6xEiYniZ7YlHHPZvh1f1ndlnOys0Exju280z4K4JR0803FMHqGD4T7e8SD_6U1Oj20FbGUyQHVfKuWPpWtiEiQTzkQbsLnnQmv52yJoWCTzbCTM1_LDfsQ8BRzk_0Oc2K1.Ql0LoguzcKYuE52.rlai16K5nFw8dOJ3.SlMA_JhQFFUyOYaYHbA7u6uhCUrozM4XKKsRfUW7b3o9JHatvTaKcgCQbJ5_R8jM_iyTvAMJx91IUM4w.pfkdm_KpNk4CQA02xJa_3AK0xbVlnXcXiE65Tk8qGNeEQ7TMOa748u3Sh4Td7MpWjR7qHeB4xrD_a865smZpJE4jMGX8T6MuyT1S3CW22G1Iug; _dd_s=rum=0&expire=1742140931186',  # Replace with actual token
}

# Send the request
response = requests.get(url, headers=headers)

if response.status_code == 200:
    csv_data = StringIO(response.content.decode('utf-8'))
    df = pd.read_csv(csv_data)
    df.to_csv("Audience Stats.csv", index=False)
else:
    print("Failed to fetch data:", response.status_code)