In [86]:
import numpy as np
import pandas as pd
import random

In [87]:
# --- California Data Generation ---
data = {
    "grape": [],
    "region": [],
    "vintage": [],
    "estate": [],
    "grape_law": [],
    "region_law": [],
    "vintage_law": [],
    "country": [],
    "designation": []
}

df = pd.DataFrame(data)

California label laws:

varietal: 75%

state: 100%

county: 75%

ava: 85%

single vineyard: 95%

vintage(ava): 95%

vintage(county/state): 85%

estate: 100%


In [88]:
# California Wine Categories
ca_grape = [
    "cabernet sauvignon", "zinfandel", "merlot", 
    "chardonnay", "pinot noir", "sauvignon blanc"
]
ca_region = [
    "napa valley", "sonoma valley", "russian river valley", 
    "mendocino", "santa barbara", "paso robles", 
    "monterey county", "lodi", "central coast", "temecula valley", 
    "california", "san luis obispo county", "ca"
]
ca_vintage = np.arange(2015, 2025)


In [90]:
# --- California Wine Generation Logic ---
for region in ca_region:
    num = 10  # Random sample size
    isState = region in ["california", "ca"]    
    isCounty = "county" in region
    isAVA = not isCounty and not isState

    for i in range(num):
        isEstate = (random.randint(0, 3) == 0)
        #isSingleVineyard = (random.randint(0, 4) == 0)  # 20% chance of being Single Vineyard

        label = {
            "grape": random.choice(ca_grape),
            "region": region,
            "vintage": random.choice(ca_vintage),
            "estate": isEstate,
            "grape_law": 0.75,
            #"region_law": 1.00 if isEstate else (0.95 if isSingleVineyard else (0.85 if isAVA else (1 if not isCounty else 0.75))),
            "region_law": 1.00 if isEstate else (0.85 if isAVA else (1 if not isCounty else 0.75)),
            "vintage_law": 0.85 if (isState or isCounty) else 0.95,
            "country": "california",
            "designation": "Estate" if isEstate else ("AVA" if isAVA else ("County" if isCounty else "Country"))
        }
        df.loc[len(df)] = label


In [91]:
# Save to CSV
df.to_csv('CA_wines.csv', index=False)
print("California wines saved to CA_wines.csv")

California wines saved to CA_wines.csv


In [92]:
# California Grapes and Regions Analysis
ca_grapes = [
    "cabernet sauvignon", "zinfandel", "merlot", 
    "chardonnay", "pinot noir", "sauvignon blanc"
]

print("\nGRAPES")
for grape in ca_grapes:
    count = len(df[df["grape"] == grape])
    print(f"{grape}: {count}")

print("\nREGIONS")
for region in df['region'].unique().tolist():
    total = len(df[df["region"] == region])
    not_estate = len(df[(df['region'] == region) & (df['estate'] == False)])
    print(f"{region}: {total}")
    print(f"{region} (not estate): {not_estate}")
    print()

print("\nVINTAGES")
for vintage in sorted(df['vintage'].unique().tolist()):
    total = len(df[df["vintage"] == vintage])
    print(f"{vintage}: {total}")

print("\nESTATES")
estate_total = len(df[df["estate"] == True])
print(f"Total Estate Wines: {estate_total}")



GRAPES
cabernet sauvignon: 15
zinfandel: 31
merlot: 22
chardonnay: 17
pinot noir: 26
sauvignon blanc: 19

REGIONS
napa valley: 10
napa valley (not estate): 9

sonoma valley: 10
sonoma valley (not estate): 7

russian river valley: 10
russian river valley (not estate): 8

mendocino: 10
mendocino (not estate): 7

santa barbara: 10
santa barbara (not estate): 8

paso robles: 10
paso robles (not estate): 6

monterey county: 10
monterey county (not estate): 7

lodi: 10
lodi (not estate): 7

central coast: 10
central coast (not estate): 6

temecula valley: 10
temecula valley (not estate): 7

california: 10
california (not estate): 8

san luis obispo county: 10
san luis obispo county (not estate): 8

ca: 10
ca (not estate): 7


VINTAGES
2015: 15
2016: 13
2017: 16
2018: 12
2019: 19
2020: 6
2021: 16
2022: 15
2023: 11
2024: 7

ESTATES
Total Estate Wines: 35


In [93]:
# Display the first few rows
df.head()

Unnamed: 0,grape,region,vintage,estate,grape_law,region_law,vintage_law,country,designation
0,sauvignon blanc,napa valley,2019,False,0.75,0.85,0.95,california,AVA
1,sauvignon blanc,napa valley,2019,False,0.75,0.85,0.95,california,AVA
2,zinfandel,napa valley,2016,False,0.75,0.85,0.95,california,AVA
3,sauvignon blanc,napa valley,2017,False,0.75,0.85,0.95,california,AVA
4,cabernet sauvignon,napa valley,2024,False,0.75,0.85,0.95,california,AVA
