# Fractional Counts Diagnostics
Notebook to inspect matching between BigQuery output, RegPat filtering, and inventor-country aggregation.

In [None]:
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

DATA_DIR = Path('data')
processed = DATA_DIR / 'processed'
output = DATA_DIR / 'output'
pct_path = processed / 'pct_from_bq.csv'
regpat_path = processed / 'regpat_filtered.parquet'
counts_path = output / 'inventor_country_yearly_fractional_counts.csv'

pct = pd.read_csv(pct_path)
reg = pd.read_parquet(regpat_path)
counts = pd.read_csv(counts_path)
pct['filing_year'] = pct['filing_date'].astype(str).str[:4].astype(int)
matched_pct = pct['pct_nbr'].isin(set(reg['pct_nbr']))
pct['matched'] = matched_pct
pct.head()

## Matched vs unmatched PCT numbers

In [None]:
summary = pct.groupby(['filing_year','matched']).size().unstack(fill_value=0)
summary.tail(10)

In [None]:
summary.plot(kind='bar', stacked=True, figsize=(12,5))
plt.ylabel('Number of PCTs')
plt.title('Matched vs unmatched PCT numbers by filing year')
plt.tight_layout()

## Inventor-country totals (selected countries)

In [None]:
selected = counts[counts['inventor_country'].isin(['CN','US','JP','UK'])]
pivot = selected.pivot(index='filing_year', columns='inventor_country', values='fractional_patents').fillna(0)
pivot.plot(figsize=(12,5))
plt.title('Fractional patents by filing year (selected countries)')
plt.ylabel('Fractional patents')
plt.tight_layout()