In [2]:
import os
import pandas as pd

folder_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\modified"
output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed"

# Get all Excel files in the folder
files = [f for f in os.listdir(folder_path) if f.endswith('.xlsx')]

# Read and append all Excel files
df_list = []
for file in files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_excel(file_path)
    df_list.append(df)

# Concatenate all dataframes
combined_df = pd.concat(df_list, ignore_index=True)

# Drop the 'max_date' column if it exists
combined_df = combined_df.drop(columns=["max_date"], errors="ignore")

# Optionally save the result
combined_df.to_excel(os.path.join(output_path, 'combined_output.xlsx'), index=False)


In [2]:
import pandas as pd

# File paths
partd_23_path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\main\partd.csv"
partd_16_path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\main\partd16.xlsx"
partd_19_path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\main\partd19.xlsx"
combined_path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\combined_output.xlsx"
output_path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\partd_combined.xlsx"

# Load simplified Part D CSV
part23 = pd.read_csv(partd_23_path)
part16 = pd.read_excel(partd_16_path, sheet_name="2016", header=3)
part19 = pd.read_excel(partd_19_path, sheet_name="2019", header=3)

# Filter to "Overall" manufacturer only
part23 = part23[part23["Mftr_Name"] == "Overall"].copy()

cols_to_keep23 = ["Brnd_Name", "Gnrc_Name"] + [col for col in part23.columns if col.startswith("Tot_Spndng_")]
part23 = part23[cols_to_keep23].copy()

cols_to_keep16 = ["Brnd_Name", "Gnrc_Name"] + [col for col in part16.columns if col.startswith("Tot_Spndng_")]
part16 = part16[cols_to_keep16].copy()

cols_to_keep19 = ["Brnd_Name", "Gnrc_Name"] + [col for col in part19.columns if col.startswith("Tot_Spndng_")]
part19 = part19[cols_to_keep19].copy()

# Clean Brnd_Name and Gnrc_Name columns in all part files
for df in [part23, part16, part19]:
    df["Brnd_Name"] = df["Brnd_Name"].astype(str).str.strip().str.replace("\u00A0", " ", regex=False)
    df["Gnrc_Name"] = df["Gnrc_Name"].astype(str).str.strip().str.replace("\u00A0", " ", regex=False)
    df["Brnd_Name"] = df["Brnd_Name"].str.lower()
    df["Gnrc_Name"] = df["Gnrc_Name"].str.lower()

In [3]:
# First merge part23 and part16
part = pd.merge(part23, part16, on=["Brnd_Name", "Gnrc_Name"], how="outer")

# Then merge the result with part19
partd = pd.merge(part, part19, on=["Brnd_Name", "Gnrc_Name"], how="outer")

# Save the merged result
partd.to_excel(output_path, index=False)

In [19]:
# Load combined dataset
df = pd.read_excel(combined_path)

min_product = df.groupby("ApplNo")["ProductNo"].transform("min")

# Step 2: Keep only rows where ProductNo is the smallest for that ApplNo
df = df[df["ProductNo"] == min_product]

# Extract years
year_app = df["ApplicationDocsDate"].dt.year
year_sub = df["SubmissionStatusDate"].dt.year

# Apply rule
df["year"] = year_app
df.loc[(year_app.notna()) & (year_sub.notna()) & (abs(year_sub - year_app) <= 5), "year"] = year_sub

# Step 4: Generate new `appyear` as min of `year` within (ApplNo, ProductNo) group
df["appyear"] = df.groupby(["ApplNo", "ProductNo"])["year"].transform("min")

df = df.drop(columns=["appear"])

# Step 1: Create bio flag
df["bio"] = (df["ApplType"] == "BLA").astype(int)

# Step 2: Drop rows where appyear < 2000
df = df[df["appyear"] >= 2000]
df_output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\combined_check.xlsx"
df.to_excel(df_output_path, index=False)

In [24]:
from fuzzywuzzy import fuzz, process
import pandas as pd

# Clean text function
def normalize(text):
    if pd.isna(text):
        return ""
    return str(text).strip().lower().replace("-", " ").replace("/", " ")

# Step 1: Clean both datasets
df["DrugName_clean"] = df["DrugName"].apply(normalize)
partd["Brnd_Name_clean"] = partd["Brnd_Name"].apply(normalize)
partd["Gnrc_Name_clean"] = partd["Gnrc_Name"].apply(normalize)

# Step 2: Exact match on Brnd_Name_clean
merged_exact = df.merge(
    partd,
    left_on="DrugName_clean",
    right_on="Brnd_Name_clean",
    how="left",
    indicator=True
)

# Step 3: Split matched and unmatched
matched_exact = merged_exact[merged_exact["_merge"] == "both"].copy()
unmatched = merged_exact[merged_exact["_merge"] == "left_only"][df.columns].copy()

# Step 4: Exclude already matched brand names from fuzzy pool
matched_brands = matched_exact["Brnd_Name_clean"].dropna().unique()
partd_df_fuzzy = partd[~partd["Brnd_Name_clean"].isin(matched_brands)].copy()

# Step 5: Build fuzzy match mapping (DrugName_clean → matched Gnrc_Name_clean row)
fuzzy_match_map = {}

for name in unmatched["DrugName_clean"].dropna().unique():
    match_result = process.extractOne(name, partd_df_fuzzy["Gnrc_Name_clean"], scorer=fuzz.token_sort_ratio)
    if match_result:
        match, score, _ = match_result  # fix: unpack all 3 values
        if score > 90:
            matched_row = partd_df_fuzzy[partd_df_fuzzy["Gnrc_Name_clean"] == match].iloc[0].to_dict()
            matched_row["match_score"] = score
            fuzzy_match_map[name] = matched_row

# Step 6: Convert fuzzy match map to DataFrame
fuzzy_lookup_df = pd.DataFrame.from_dict(fuzzy_match_map, orient="index").reset_index().rename(columns={"index": "DrugName_clean"})

# Step 7: Merge fuzzy matches back into unmatched rows
matched_fuzzy = unmatched.merge(fuzzy_lookup_df, on="DrugName_clean", how="left")

# Step 8: Add match type labels
matched_exact["match_type"] = "exact"
matched_fuzzy["match_type"] = "fuzzy"

# Step 9: Combine exact and fuzzy matches
final_merged = pd.concat([matched_exact, matched_fuzzy], ignore_index=True)


In [25]:
columns_to_fill = [
    "Brnd_Name", "Gnrc_Name",
    "Tot_Spndng_2019", "Tot_Spndng_2020", "Tot_Spndng_2021",
    "Tot_Spndng_2022", "Tot_Spndng_2023",
    "Tot_Spndng_2012", "Tot_Spndng_2013", "Tot_Spndng_2014",
    "Tot_Spndng_2015", "Tot_Spndng_2016", "Tot_Spndng_2017", "Tot_Spndng_2018",
    "Brnd_Name_clean", "Gnrc_Name_clean", "_merge", "match_type", "match_score"
]

# Group by ApplNo and ProductNo, and forward-fill & backward-fill within each group
final_merged[columns_to_fill] = (
    final_merged
    .sort_values(["ApplNo", "ProductNo"])  # make sure sorting is stable
    .groupby(["ApplNo", "ProductNo"])[columns_to_fill]
    .transform(lambda group: group.ffill().bfill())
)

# Step 10: Save to Excel
final_output = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\combined_output_exp.xlsx"
final_merged.to_excel(final_output, index=False)

  .transform(lambda group: group.ffill().bfill())


In [26]:
import pandas as pd
exp_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\combined_output_exp.xlsx"
exp_df = pd.read_excel(exp_path)

# Sort and fill missing indication_count
exp_df = exp_df.sort_values(["ApplNo", "ProductNo", "SubmissionStatusDate"])

# Group by and forward-fill indication_count
exp_df["indication_count"] = exp_df.groupby(["ApplNo", "ProductNo"])["indication_count"].ffill()

# Step 1: Sort by indication_count to get largest first (handle NaNs as lowest)
exp_df = exp_df.sort_values(["ApplNo", "ProductNo", "year", "indication_count"], ascending=[True, True, True, False])

# Step 2: Drop duplicates, keeping the one with the highest indication_count
exp_df = exp_df.drop_duplicates(subset=["ApplNo", "ProductNo", "year"], keep="first")

In [None]:
# Step 2: Define which columns to carry forward during expansion
fill_cols = [
    "Form", "Strength", "DrugName", "ActiveIngredient", "ApplType", "SponsorName",
    "first_orig", "indication_count", "bio", "DrugName_clean", "Brnd_Name", "Gnrc_Name",
    "Tot_Spndng_2019", "Tot_Spndng_2020", "Tot_Spndng_2021", "Tot_Spndng_2022", "Tot_Spndng_2023",
    "Tot_Spndng_2012", "Tot_Spndng_2013", "Tot_Spndng_2014", "Tot_Spndng_2015",
    "Tot_Spndng_2016", "Tot_Spndng_2017", "Tot_Spndng_2018", "Brnd_Name_clean", "Gnrc_Name_clean",
    "_merge", "match_type", "match_score", "appyear"
]


# Step 3: Group and expand from appyear to 2025
expanded_rows = []

for (appl, prod), group in exp_df.groupby(["ApplNo", "ProductNo"]):
    group = group.sort_values("year")
    
    # Get the list of valid years from existing records
    original_years = group["year"].tolist()
    
    # Create full year range from first to 2025
    full_years = list(range(original_years[0], 2026))
    full_year_df = pd.DataFrame({"year": full_years})
    
    # Merge with original group
    merged = full_year_df.merge(group, on="year", how="left")
    
    # Add ApplNo and ProductNo
    merged["ApplNo"] = appl
    merged["ProductNo"] = prod
    
    # Forward fill the rest of the fields
    merged[fill_cols] = merged[fill_cols].ffill()
    
    expanded_rows.append(merged)

# Step 4: Combine all into one DataFrame
final_df = pd.concat(expanded_rows, ignore_index=True)

# Step 5: Reorder columns (optional)
first_cols = ["ApplNo", "ProductNo", "DrugName", "Form", "Strength", "year", "appyear", "indication_count", "bio"]
remaining_cols = [col for col in final_df.columns if col not in first_cols]
final_df = final_df[first_cols + remaining_cols]

# Step 6: Save to Excel
output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\expanded_output.xlsx"
final_df.to_excel(output_path, index=False)

In [5]:
import pandas as pd
import numpy as np
path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\expanded_output.xlsx"
output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_1.xlsx"

df = pd.read_excel(path)

# 1) Ensure the spending columns are numeric, fill NaN with 0
spending_cols = [
    "Tot_Spndng_2012","Tot_Spndng_2013","Tot_Spndng_2014","Tot_Spndng_2015",
    "Tot_Spndng_2016","Tot_Spndng_2017","Tot_Spndng_2018","Tot_Spndng_2019",
    "Tot_Spndng_2020","Tot_Spndng_2021","Tot_Spndng_2022","Tot_Spndng_2023"
]

for c in spending_cols:
    df[c] = pd.to_numeric(df[c], errors="coerce").fillna(0)

# 3) Decide which spending year to rank on
def _spend_year(row):
    if row["appyear"] < 2007:
        return 2012
    elif row["appyear"] > 2017:
        return 2023
    else:  # must be between 2007 and 2017 inclusive
        return int(row["appyear"] + 5)

df["spend_year_for_rank"] = df.apply(_spend_year, axis=1).astype("Int64")

# 4) Pull the actual spending used for ranking from the chosen year column
def _spend_value(row):
    if pd.isna(row["spend_year_for_rank"]):
        return np.nan
    col = f"Tot_Spndng_{int(row['spend_year_for_rank'])}"
    return row[col] if col in row else np.nan

df["spend_for_rank"] = df.apply(_spend_value, axis=1)

# 5) Rank within each spend_year_for_rank (1 = highest spending)
df["rank"] = (
    df.groupby("spend_year_for_rank")["spend_for_rank"]
      .rank(method="min", ascending=False)
      .astype("Int64")
)

df["partd"]=(~df["Brnd_Name"].isna()).astype(int)
df.to_excel(output_path, index=False)

In [1]:
import pandas as pd

path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_1.xlsx"
output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank.xlsx"

df = pd.read_excel(path)

# Replace rank with the max rank per ApplNo & ProductNo
df["rank"] = (
    df.sort_values(["ApplNo", "ProductNo"])
      .groupby(["ApplNo", "ProductNo"])["rank"]
      .transform("max")
)

# Years to present
df["ytp"] = 2022 - df["year"]
df.loc[df["bio"] == 1, "ydp"] = df["appyear"] - 2011
df.loc[df["bio"] != 1, "ydp"] = df["appyear"] - 2015

# Create top50, top100, top150 as binary flags
df["top50"] = ((df["rank"] <= 50).fillna(False)).astype(int)
df["top100"] = ((df["rank"] <= 100).fillna(False)).astype(int)
df["top150"] = ((df["rank"] <= 150).fillna(False)).astype(int)
df["top250"] = ((df["rank"] <= 250).fillna(False)).astype(int)

df["ytp_0"] = (df["ytp"] <= 0).astype(int)
df["ytp_1"] = (df["ytp"] <= 1).astype(int)
df["ytp_2"] = (df["ytp"] <= 2).astype(int)
df["ytp_3"] = (df["ytp"] <= 3).astype(int)
df["ytp_4"] = (df["ytp"] <= 4).astype(int)
df["ytp_5"] = (df["ytp"] <= 5).astype(int)

df["ydp_0"] = (df["ydp"] <= 0).astype(int)
df["ydp_1"] = (df["ydp"] <= 1).astype(int)
df["ydp_2"] = (df["ydp"] <= 2).astype(int)
df["ydp_3"] = (df["ydp"] <= 3).astype(int)
df["ydp_4"] = (df["ydp"] <= 4).astype(int)
df["ydp_5"] = (df["ydp"] <= 5).astype(int)

df.to_excel(output_path, index=False)

In [2]:
import pandas as pd
path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank.xlsx"
app_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\appl_product_first_years.xlsx"
output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank1.xlsx"
df = pd.read_excel(path)
df1 = pd.read_excel(app_path)
df['r21_50']  = (df['Tot_Spndng_2021'] >= 788_463_467).astype('Int64')
df['r21_100'] = (df['Tot_Spndng_2021'] >= 450_220_327).astype('Int64')
df['r21_150'] = (df['Tot_Spndng_2021'] >= 285_251_576).astype('Int64')
df['r21_250'] = (df['Tot_Spndng_2021'] >= 147_656_079).astype('Int64')
df.to_excel(output_path, index=False)

In [3]:
import pandas as pd

# --- Paths ---
path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank1.xlsx"
app_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\appl_product_first_years.xlsx"
output_path = r"F:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank2.xlsx"

# --- Load ---
df = pd.read_excel(path, dtype={"ApplNo": str, "ProductNo": str})
df1 = pd.read_excel(app_path, dtype={"ApplNo": str, "ProductNo": str})

# --- Merge ---
merged = pd.merge(
    df, df1,
    on=["ApplNo", "ProductNo"],  # keys
    how="left"                   # keep all rows from df
)


merged = merged[merged["FirstSubmissionStatusYear"]>=2000].copy()
# --- Save ---
merged.to_excel(output_path, index=False)


## mannual

In [29]:
import pandas as pd

# --- Paths ---
path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank2.xlsx"
output_path = r"D:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank2_fixed.xlsx"

# --- Load ---
df = pd.read_excel(path, dtype={"ApplNo": str, "ProductNo": str})

# --- Target drug keys 20725 ---
TARGET_APPLNO = "20725"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2009
ADD_YEARS = list(range(2009, 2020))  # 2009..2019 inclusive

# Work on a copy
out = df.copy()

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 20986 ---
TARGET_APPLNO = "20986"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2015))  # 2009..2019 inclusive


# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# --- Target drug keys 21077 ---
TARGET_APPLNO = "21077"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2003))  # 2009..2019 inclusive


# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21081 ---
TARGET_APPLNO = "21081"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2015))  # 2009..2019 inclusive


# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 2

In [30]:
# --- Target drug keys 21106 ---
TARGET_APPLNO = "21106"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2003
ADD_YEARS = list(range(2003, 2020))  # 2009..2019 inclusive


# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21152 ---
TARGET_APPLNO = "21152"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2015))  # 2009..2019 inclusive


# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21172 ---
TARGET_APPLNO = "21172"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2001
ADD_YEARS = list(range(2001, 2020))  # 2009..2019 inclusive


# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21271 ---
TARGET_APPLNO = "21271"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2003
ADD_YEARS = list(range(2003, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 3

In [31]:
# --- Target drug keys 21346 ---
TARGET_APPLNO = "21346"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2003
ADD_YEARS = list(range(2003, 2007))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21426 ---
TARGET_APPLNO = "21426"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2006
ADD_YEARS = list(range(2006, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 2
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21426 ---
TARGET_APPLNO = "21446"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2007))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 3
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21426 ---
TARGET_APPLNO = "21472"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2002
ADD_YEARS = list(range(2002, 2006))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21505 ---
TARGET_APPLNO = "21505"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2003
ADD_YEARS = list(range(2003, 2005))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21514 ---
TARGET_APPLNO = "21514"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2006
ADD_YEARS = list(range(2006, 2010))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21514 ---
TARGET_APPLNO = "21536"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21514 ---
TARGET_APPLNO = "21538"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2021))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 2
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 4

In [32]:
# --- Target drug keys 21597 ---
TARGET_APPLNO = "21597"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2003
ADD_YEARS = list(range(2003, 2021))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21629 ---
TARGET_APPLNO = "21629"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21640 ---
TARGET_APPLNO = "21640"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21640 ---
TARGET_APPLNO = "21663"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21640 ---
TARGET_APPLNO = "21665"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21684 ---
TARGET_APPLNO = "21684"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 2
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "21716"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 4.5

In [33]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21731"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2023))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

In [34]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21742"
TARGET_PRODUCTNO = "2"
NEW_APPYEAR = 2007
ADD_YEARS = list(range(2007, 2010))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

In [35]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21765"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "21810"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "21839"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 4.8

In [36]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21859"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

In [37]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21878"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "21884"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 5

In [38]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21926"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2015))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "21964"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2014))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 5.1

In [39]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "21983"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2006
ADD_YEARS = list(range(2006, 2017))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)




# --- Target drug keys 21716 ---
TARGET_APPLNO = "22006"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2009
ADD_YEARS = list(range(2009, 2013))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 2
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22029"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2012))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

In [40]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "22051"
TARGET_PRODUCTNO = "2"
NEW_APPYEAR = 2007
ADD_YEARS = list(range(2007, 2016))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 5.2

In [41]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "22113"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2011
ADD_YEARS = list(range(2011, 2017))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 3
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22159"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2016))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22175"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2012
ADD_YEARS = list(range(2012, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22185"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2012))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21716 ---
TARGET_APPLNO = "22203"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2008
ADD_YEARS = list(range(2008, 2013))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

In [42]:
# --- Target drug keys 21716 ---
TARGET_APPLNO = "22210"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2009
ADD_YEARS = list(range(2009, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22287"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2009
ADD_YEARS = list(range(2009, 2011))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22436"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2009
ADD_YEARS = list(range(2009, 2014))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21716 ---
TARGET_APPLNO = "22458"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2012
ADD_YEARS = list(range(2012, 2015))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22523"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2010
ADD_YEARS = list(range(2010, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22542"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2012
ADD_YEARS = list(range(2012, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "22575"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2010
ADD_YEARS = list(range(2010, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "77394"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2021))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "77681"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2006
ADD_YEARS = list(range(2006, 2014))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "125151"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2006
ADD_YEARS = list(range(2006, 2013))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "201849"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2015
ADD_YEARS = list(range(2015, 2019))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "209500"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2019
ADD_YEARS = list(range(2019, 2021))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)



# --- Target drug keys 21716 ---
TARGET_APPLNO = "213721"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2020
ADD_YEARS = list(range(2020, 2023))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 1
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21716 ---
TARGET_APPLNO = "214032"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2021
ADD_YEARS = list(range(2021, 2023))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 2
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)




# --- Target drug keys 21716 ---
TARGET_APPLNO = "214121"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2020
ADD_YEARS = list(range(2020, 2022))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # Set indication_count = 1 for added rows (create if not exists)
    if "indication_count" in r.index:
        r["indication_count"] = 4
    new_rows.append(r)

# --- Append and drop dupes on the (ApplNo, ProductNo, year) trio ---
if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

# 5.3

In [43]:
# --- Target drug keys 21148 --- 1 until 2006
TARGET_APPLNO = "21148"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2018))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 1 if y <= 2006 else 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21149 ---
TARGET_APPLNO = "21149"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2000, 2025, inclusive="both"), "indication_count"] = 2



# --- Target drug keys 21211
TARGET_APPLNO = "21211"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2004
ADD_YEARS = list(range(2004, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 2 if y <= 2010 else 3
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21225
TARGET_APPLNO = "21225"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2009))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 1 if y <= 2007 else 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21243 ---
TARGET_APPLNO = "21243"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2000
ADD_YEARS = list(range(2000, 2022))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2000, 2025, inclusive="both"), "indication_count"] = 2



# --- Target drug keys 21266 
TARGET_APPLNO = "21266"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2002
ADD_YEARS = list(range(2002, 2004))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 4
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2002, 2003, inclusive="both"), "indication_count"] = 3
out.loc[tmask & out["year"].between(2004, 2025, inclusive="both"), "indication_count"] = 4

# --- Target drug keys 21273 
TARGET_APPLNO = "21273"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2005
ADD_YEARS = list(range(2005, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)

tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2005, 2025, inclusive="both"), "indication_count"] = 2



# --- Target drug keys 21225
TARGET_APPLNO = "21289"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2002
ADD_YEARS = list(range(2002, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 1 if y <= 2013 else 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


# --- Target drug keys 21149 ---
TARGET_APPLNO = "21484"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2002
ADD_YEARS = list(range(2002, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 2
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2002, 2025, inclusive="both"), "indication_count"] = 2



# --- Target drug keys 21149 ---
TARGET_APPLNO = "22472"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2014
ADD_YEARS = list(range(2014, 2017))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 1
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2014, 2025, inclusive="both"), "indication_count"] = 1



# --- Target drug keys 21149 ---
TARGET_APPLNO = "22505"
TARGET_PRODUCTNO = "1"
NEW_APPYEAR = 2010
ADD_YEARS = list(range(2010, 2020))  # 2009..2019 inclusive

# --- Filter subset for the target drug ---
mask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)
sub = out.loc[mask].copy()

if sub.empty:
    raise ValueError("No rows found for the specified ApplNo/ProductNo.")

# --- Update appyear to 2009 for this drug (all its rows) ---
out.loc[mask, "appyear"] = NEW_APPYEAR

# --- Build template row (use the first row for this drug after appyear update) ---
template = out.loc[mask].iloc[0].copy()

# --- Determine which years to add (avoid duplicates) ---
existing_years = set(out.loc[mask, "year"].dropna().astype(int).tolist())
to_add_years = [y for y in ADD_YEARS if y not in existing_years]

# --- Create new rows for missing years ---
new_rows = []
for y in to_add_years:
    r = template.copy()
    r["year"] = y
    r["appyear"] = NEW_APPYEAR
    # 1 for <= 2006; 2 for 2007–2017
    r["indication_count"] = 1
    new_rows.append(r)

if new_rows:
    add_df = pd.DataFrame(new_rows)
    out = pd.concat([out, add_df], ignore_index=True)


tmask = (out["ApplNo"] == TARGET_APPLNO) & (out["ProductNo"] == TARGET_PRODUCTNO)  
out.loc[tmask & out["year"].between(2010, 2025, inclusive="both"), "indication_count"] = 1





In [44]:
# Optional: sort for readability
out = out.sort_values(by=["ApplNo", "ProductNo", "year"]).reset_index(drop=True)

# --- Save ---
out.to_excel(output_path, index=False)

print(f"Added {len(to_add_years)} rows for ApplNo={TARGET_APPLNO}, ProductNo={TARGET_PRODUCTNO}.")
print(f"Saved to: {output_path}")

Added 10 rows for ApplNo=22505, ProductNo=1.
Saved to: D:\PhD\RA\Schafer\IRA\data\unzipped\merge\processed\complete_rank2_fixed.xlsx
