In [0]:
import pandas as pd
import numpy as np
import os
import glob
import re
import xgboost as xgb
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Configuration
pd.set_option('display.max_columns', None)
ROOT_PATH = 'ADD'
FOLDERS = ["Grocery_data", "New_York_data_food", "UberEats", "Yelp Data"]

In [0]:
import re
import pandas as pd

# 1. FIXED STATE MAP (Keys use spaces)
state_map = {
    'alabama': 'AL', 'alaska': 'AK', 'arizona': 'AZ', 'arkansas': 'AR', 'california': 'CA',
    'colorado': 'CO', 'connecticut': 'CT', 'delaware': 'DE', 'florida': 'FL', 'georgia': 'GA',
    'hawaii': 'HI', 'idaho': 'ID', 'illinois': 'IL', 'indiana': 'IN', 'iowa': 'IA',
    'kansas': 'KS', 'kentucky': 'KY', 'louisiana': 'LA', 'maine': 'ME', 'maryland': 'MD',
    'massachusetts': 'MA', 'michigan': 'MI', 'minnesota': 'MN', 'mississippi': 'MS', 'missouri': 'MO',
    'montana': 'MT', 'nebraska': 'NE', 'nevada': 'NV', 'new hampshire': 'NH', 'new jersey': 'NJ',
    'new mexico': 'NM', 'new york': 'NY', 'north carolina': 'NC', 'north dakota': 'ND', 'ohio': 'OH',
    'oklahoma': 'OK', 'oregon': 'OR', 'pennsylvania': 'PA', 'rhode island': 'RI', 'south carolina': 'SC',
    'south dakota': 'SD', 'tennessee': 'TN', 'texas': 'TX', 'utah': 'UT', 'vermont': 'VT',
    'virginia': 'VA', 'washington': 'WA', 'west virginia': 'WV', 'wisconsin': 'WI', 'wyoming': 'WY',
    'district of columbia': 'DC'
}
def extract_state(state_input):
    """
    Standardizes state input by mapping full names to 2-letter codes.
    Handles various delimiters (commas, underscores, spaces) and case sensitivity.
    """

    # Cleaning the input string
    state_input = state_input.replace('_', ' ').replace(',', ' ').strip().title()

    # Check for direct matches in the map
    for full_name, code in state_map.items():
        if full_name in state_input:
            return code

    # Fallback: check if the input is already a valid 2-letter code
    potential_code = state_input.upper()[-2:]
    if potential_code in state_map.values():
        return potential_code

    return None

Block 2: Load Data & Extract Grocery Indexes

In [0]:
data = {}

# Iterate through folders and load all CSVs
for folder in FOLDERS:
    path = os.path.join(ROOT_PATH, folder)
    if os.path.exists(path):
        data[folder] = {}
        # Glob patterns to find csvs
        for f in glob.glob(os.path.join(path, "*.csv")):
            key_name = os.path.basename(f).replace('.csv', '')
            data[folder][key_name] = pd.read_csv(f)
            print(f"Loaded: {folder}/{key_name}")
    else:
        print(f"‚ö†Ô∏è Warning: Path not found: {path}")

Loaded: Grocery_data/us_cost_of_living_data
Loaded: New_York_data_food/menu_data
Loaded: UberEats/restaurants
Loaded: Yelp Data/yelp_restaurants_california
Loaded: Yelp Data/yelp_restaurants_hawaii
Loaded: Yelp Data/yelp_restaurants_nevada
Loaded: Yelp Data/yelp_restaurants_illinois
Loaded: Yelp Data/yelp_restaurants_texas
Loaded: Yelp Data/yelp_restaurants_new_york
Loaded: Yelp Data/yelp_restaurants_michigan


In [0]:
# Load the Cost of Living data to build our Index Map
df_col = data['Grocery_data']['us_cost_of_living_data'].copy()
df_col['state_code'] = df_col['State'].apply(extract_state)

# Create lookup dictionary: {'NY': 103.5, 'TX': 91.2, ...}
grocery_index_map = dict(zip(df_col['state_code'], df_col['Grocery']))
national_avg_index = df_col['Grocery'].mean()

print(f"--- Index Map Ready ---")
print(f"States with Index Data: {len(grocery_index_map)}")
print(f"National Average Index: {national_avg_index:.2f}")

--- Index Map Ready ---
States with Index Data: 51
National Average Index: 101.26


Block 3: Prepare Restaurant Data & synthasize data

In [0]:
# ==========================================
# 4. MERGE & CLEAN (ALL FORMATS HANDLED)
# ==========================================
import numpy as np
import pandas as pd
import re

def parse_target_cost(val):
    """
    The Universal Translator:
    Converts Uber ($$), Yelp ($11-30), and NY (Float) into a continuous dollar amount.
    """
    s = str(val).strip()
    
    # CASE 1: YELP RANGES (Specific Text)
    if '11-30' in s:
        return np.round(np.random.uniform(11, 30), 2)
    if 'Under 10' in s or 'Under $10' in s:
        return np.round(np.random.uniform(5, 10), 2)
    if '31-60' in s:
        return np.round(np.random.uniform(31, 60), 2)
    if 'Above 61' in s or 'Above $61' in s:
        return np.round(np.random.uniform(61, 90), 2)
        
    # CASE 2: UBER SYMBOLS ($, $$, etc.)
    # We check length or specific characters
    if '$$$$' in s or '4' in s:
        return np.round(np.random.uniform(60, 100), 2)
    if '$$$' in s or '3' in s:
        return np.round(np.random.uniform(31, 60), 2)
    if '$$' in s or '2' in s:
        return np.round(np.random.uniform(15, 30), 2)
    if '$' in s or '1' in s:
        return np.round(np.random.uniform(8, 15), 2)

    return np.nan

# --- 1. PROCESS UBER EATS ---
print("Processing UberEats...")
uber = data['UberEats']['restaurants'].copy()
uber['source'] = 'UberEats'
uber['state'] = uber['full_address'].apply(extract_state)
uber = uber.rename(columns={'score': 'rating', 'ratings': 'review_count', 'category': 'cuisine'})

# Apply the translator
uber['target_cost'] = uber['price_range'].apply(parse_target_cost)

# --- 2. PROCESS YELP ---
print("Processing Yelp...")
yelp_dfs = []
if 'Yelp Data' in data:
    for name, df in data['Yelp Data'].items():
        df = df.copy()
        df['source'] = 'Yelp'
        df['state'] = extract_state(name)
        df = df.rename(columns={'aggregatedRating': 'rating', 'reviewCount': 'review_count'})
        
        # Check for the column we identified in EDA
        p_col = 'priceRange' if 'priceRange' in df.columns else 'price_level'
        if p_col in df.columns:
            df['target_cost'] = df[p_col].apply(parse_target_cost)
            yelp_dfs.append(df)

# --- 3. PROCESS NY MENU DATA (ACTUAL PRICES) ---
print("Processing NY Menu Data...")
ny_dfs = []
if 'New_York_data_food' in data and 'menu_data' in data['New_York_data_food']:
    df_ny = data['New_York_data_food']['menu_data'].copy()
    
    # Clean Prices (Regex to keep only numbers and dots)
    df_ny['clean_price'] = df_ny['Price'].astype(str).str.replace(r'[^\d.]', '', regex=True)
    df_ny['clean_price'] = pd.to_numeric(df_ny['clean_price'], errors='coerce')
    
    # Aggregate: Meal Cost ‚âà 2.0 * Average Item
    ny_grouped = df_ny.groupby('Restaurant')['clean_price'].mean().reset_index()
    ny_grouped['target_cost'] = ny_grouped['clean_price'] * 2.0
    
    # Add metadata
    ny_grouped['source'] = 'NY_Data'
    ny_grouped['state'] = 'NY'
    ny_grouped['rating'] = 4.0       
    ny_grouped['review_count'] = 50  
    ny_grouped['cuisine'] = 'Other' 
    
    ny_dfs.append(ny_grouped)

# --- 4. COMBINE & FINAL CLEAN ---
all_dfs = [uber] + yelp_dfs + ny_dfs
df_master = pd.concat(all_dfs, ignore_index=True)

# Drop rows where parsing failed (NaN cost) or state is unknown
df_master = df_master.dropna(subset=['target_cost', 'state'])
df_master = df_master[df_master['state'] != 'Unknown']
print(f"--- Data Merged ---")
print(f"Sources: {df_master['source'].unique()}")
print(f"Total Rows: {len(df_master)}")
print(f"Cost Stats:\n{df_master['target_cost'].describe()}")

Processing UberEats...
Processing Yelp...
Processing NY Menu Data...
--- Data Merged ---
Sources: ['UberEats' 'Yelp' 'NY_Data']
Total Rows: 53777
Cost Stats:
count    53777.000000
mean        15.122336
std          6.997311
min          5.160000
25%         10.520000
50%         13.030000
75%         17.650000
max        138.152062
Name: target_cost, dtype: float64


In [0]:
# ==========================================
# VERIFICATION: STATE BREAKDOWN
# ==========================================

print("--- üìä Row Counts by State (Top 10) ---")
print(df_master['state'].value_counts().head(10))

print("\n--- üí∞ Average Meal Cost by State (Top 10 Most Expensive) ---")
# Group by state and get the average 'target_cost'
state_costs = df_master.groupby('state')['target_cost'].mean().sort_values(ascending=False)
print(state_costs.head(10))

print("\n--- üìâ Average Meal Cost by State (Top 10 Cheapest) ---")
print(state_costs.tail(10))

--- üìä Row Counts by State (Top 10) ---
state
TX    24634
VA     7812
WA     7355
WI     3648
UT     2510
NE     1303
WV     1265
AL      952
MD      626
NY      616
Name: count, dtype: int64

--- üí∞ Average Meal Cost by State (Top 10 Most Expensive) ---
state
NY    29.831697
DC    28.490000
HI    25.161971
MI    24.854383
IL    20.496734
IA    20.193333
VT    19.590680
KS    19.105000
WY    17.382395
CO    17.340909
Name: target_cost, dtype: float64

--- üìâ Average Meal Cost by State (Top 10 Cheapest) ---
state
TN    13.808511
NH    13.540000
KY    13.513333
OR    13.205699
MA    13.104286
GA    13.083056
ND    10.515000
NM    10.497500
ME     9.525000
SC     8.410000
Name: target_cost, dtype: float64


In [0]:
# ==========================================
# 5. MULTI-SOURCE SYNTHESIS (BALANCE THE STATES)
# ==========================================

# Configuration
# We want every state to have at least this many rows to ensure the model treats them equally.
TARGET_ROWS_PER_STATE = 45000 
SAMPLE_BATCH_SIZE = 15000 # We'll pull 15k chunks from different sources

# 1. Identify "Rich" vs "Poor" Data States
state_counts = df_master['state'].value_counts()
existing_states = set(df_master['state'].unique())
all_index_states = set(grocery_index_map.keys())

# Targets = Missing States OR Existing States with < 45k rows
# (This ensures even NY with 616 rows gets boosted)
targets_missing = all_index_states - existing_states
targets_low_data = [s for s in existing_states if state_counts[s] < TARGET_ROWS_PER_STATE]
final_targets = list(targets_missing) + targets_low_data

# Sources we can steal data from (e.g. UberEats, Yelp, NY_Data)
available_sources = df_master['source'].unique()

print(f"--- Synthesis Plan ---")
print(f"Goal: Ensure every state has ~{TARGET_ROWS_PER_STATE} rows.")
print(f"Synthesizing data for {len(final_targets)} states.")

synthetic_dfs = []

for target_state in final_targets:
    # Get Cost Index for the target state (e.g. FL = 100.5)
    target_index = grocery_index_map.get(target_state, national_avg_index)
    
    # Check how many rows we already have (so we don't over-synthesize)
    current_count = state_counts.get(target_state, 0)
    needed = TARGET_ROWS_PER_STATE - current_count
    
    if needed <= 0: continue

    # Distribute the "needed" rows across available sources
    # If we need 40k rows and have 3 sources, we take ~13k from each
    rows_per_source = int(needed / len(available_sources)) + 1
    
    for source_name in available_sources:
        # 1. Get data for this source
        source_data = df_master[df_master['source'] == source_name]
        if source_data.empty: continue
            
        # 2. Pick the best "Template" within this source (State with most rows)
        best_template_state = source_data['state'].value_counts().idxmax()
        template_df = source_data[source_data['state'] == best_template_state]
        
        # 3. Calculate Price Ratio
        template_index = grocery_index_map.get(best_template_state, national_avg_index)
        ratio = target_index / template_index
        
        # 4. Sample Rows
        # Pull random meals from the template
        replace_flag = len(template_df) < rows_per_source
        sample_rows = template_df.sample(n=rows_per_source, replace=replace_flag)
        
        # 5. Create Synthetic Rows
        new_rows = sample_rows.copy()
        new_rows['state'] = target_state
        new_rows['target_cost'] = new_rows['target_cost'] * ratio
        
        # Tag as synthetic just in case we want to debug later
        # new_rows['source'] = source_name # Keep original source name so model knows it's "Yelp-style" data
        
        synthetic_dfs.append(new_rows)

# Merge
if synthetic_dfs:
    df_synthetic = pd.concat(synthetic_dfs, ignore_index=True)
    df_master = pd.concat([df_master, df_synthetic], ignore_index=True)

print(f"‚úÖ Synthesis Complete.")
print(f"Total Rows: {len(df_master)}")
print(f"New Distribution (Top 5):\n{df_master['state'].value_counts().head()}")

--- Synthesis Plan ---
Goal: Ensure every state has ~45000 rows.
Synthesizing data for 52 states.
‚úÖ Synthesis Complete.
Total Rows: 2340101
New Distribution (Top 5):
state
OR    45003
FL    45003
WI    45003
WY    45003
MI    45003
Name: count, dtype: int64


In [0]:
# ==========================================
# VERIFICATION: QUANTITY & QUALITY
# ==========================================

print("--- 1. QUANTITY CHECK (Goal: > 40k rows per state) ---")
state_counts = df_master['state'].value_counts()
min_count = state_counts.min()
max_count = state_counts.max()

print(f"Minimum Rows in any state: {min_count}")
print(f"Maximum Rows in any state: {max_count}")

if min_count < 40000:
    print(f"‚ö†Ô∏è FAIL: Some states have less than 40k rows. Run Synthesis again.")
    print(state_counts.tail())
else:
    print("‚úÖ SUCCESS: All states have sufficient data.")

print("\n--- 2. QUALITY CHECK (Goal: Different Prices per State) ---")
# We check the Average Price per state to ensure the Index Math worked.
# Expensive states (HI, NY, CA) should have higher averages than cheap states (MS, AL).

stats = df_master.groupby('state')['target_cost'].agg(['mean', 'std', 'min', 'max']).sort_values('mean', ascending=False)

print("\nTop 5 Most Expensive States (Synthesized):")
print(stats.head(5))

print("\nTop 5 Cheapest States (Synthesized):")
print(stats.tail(5))

# Sanity Check: Is Hawaii (HI) more expensive than Mississippi (MS)?
try:
    hi_price = stats.loc['HI', 'mean']
    ms_price = stats.loc['MS', 'mean']
    print(f"\nSanity Check:")
    print(f"   Hawaii Mean: ${hi_price:.2f}")
    print(f"   Mississippi Mean: ${ms_price:.2f}")
    
    if hi_price > ms_price:
        print("‚úÖ SUCCESS: Price scaling logic is working (HI > MS).")
    else:
        print("‚ö†Ô∏è FAIL: Prices look wrong. Check Index Logic.")
except KeyError:
    print("‚ö†Ô∏è Could not find HI or MS to compare.")

--- 1. QUANTITY CHECK (Goal: > 40k rows per state) ---
Minimum Rows in any state: 45001
Maximum Rows in any state: 45003
‚úÖ SUCCESS: All states have sufficient data.

--- 2. QUALITY CHECK (Goal: Different Prices per State) ---

Top 5 Most Expensive States (Synthesized):
            mean        std       min         max
state                                            
HI     28.385883  17.116002  5.730000  174.191730
AK     26.919450  15.994446  5.462161  166.049435
VT     23.325412  13.865235  4.724506  143.624752
CA     23.322645  14.220396  4.720115  143.491272
CT     23.232455  14.012382  4.698161  142.823871

Top 5 Cheapest States (Synthesized):
            mean        std       min         max
state                                            
VA     20.457340  12.638685  4.417149  134.281135
MS     20.444633  12.282769  4.140529  125.871879
UT     20.350689  12.117964  4.202000  127.740602
AR     20.178948  12.206740  4.052713  123.202274
TX     17.550978   9.991535  4.153701  1

Training model

In [0]:
# ==========================================
# 6. TRAIN XGBOOST MODEL
# ==========================================
from sklearn.model_selection import train_test_split
import xgboost as xgb
import joblib

print("--- Preparing Training Data ---")

# 1. Feature Engineering: Clean Cuisine
# Group rare cuisines into 'Other' to keep the model fast
top_cuisines = df_master['cuisine'].value_counts().nlargest(20).index
df_master['clean_cuisine'] = df_master['cuisine'].apply(lambda x: x if x in top_cuisines else 'Other')

# 2. One-Hot Encoding
# This converts 'state' and 'cuisine' into numbers the model can understand
# It creates columns like: state_AL, state_NY, clean_cuisine_Pizza, clean_cuisine_Mexican
X = pd.get_dummies(df_master[['state', 'clean_cuisine', 'rating', 'review_count']], drop_first=True)
y = df_master['target_cost']

# 3. Split Data (80% Train, 20% Test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Training on {len(X_train)} rows...")
print(f"Features: {X_train.shape[1]} columns")

# 4. Train Model
# n_estimators=200: Number of trees (higher = smarter but slower)
# learning_rate=0.05: How carefully it learns (lower = prevents overfitting)
model = xgb.XGBRegressor(n_estimators=300, max_depth=7, learning_rate=0.05, n_jobs=-1, random_state=42)
model.fit(X_train, y_train)

# 5. Evaluate
score = model.score(X_test, y_test)
print(f"‚úÖ Model Trained!")
print(f"R¬≤ Score: {score:.4f} (1.0 is perfect)")

--- Preparing Training Data ---
Training on 1872080 rows...
Features: 72 columns
‚úÖ Model Trained!
R¬≤ Score: 0.4590 (1.0 is perfect)


Block 4: The Final Predictor (With Weekly Grocery Breakup)

In [0]:
# ==========================================
# 7. PREDICTION TOOL (FIXED & COMPLETE)
# ==========================================

# 1. DEFINE ARTIFACTS (Fixes the "artifacts not defined" error)
# We pack the model and maps you just created into this dictionary.
if 'model' in locals() and 'X_train' in locals():
    artifacts = {
        "xgb_model": model,
        "model_columns": X_train.columns,
        "grocery_index_map": grocery_index_map,
        "national_avg_index": national_avg_index
    }
else:
    print("‚ö†Ô∏è Error: Model not found. Please run Block 6 (Training) first.")

# 2. USDA FOOD PLAN DATA (From your image)
GROCERY_PLANS = {
    "low":      {"male": 371.0, "female": 323.0},
    "moderate": {"male": 465.0, "female": 392.0},
    "liberal":  {"male": 566.0, "female": 499.0}
}

# 3. DEFINE THE FUNCTION
def predict_trip_breakdown(state_code, people, days, 
                           cuisine="American", vibe_rating=4.5, 
                           eating_out_per_week=4,
                           budget_level="moderate"):
    """
    Predicts vacation costs using:
    1. XGBoost Model for Restaurants (trained on 2.2M rows).
    2. USDA Food Plans for Groceries (Adjusted by State Index).
    """
    
    # --- A. RESTAURANT PREDICTION ---
    input_row = pd.DataFrame(0, index=[0], columns=artifacts['model_columns'])
    input_row['rating'] = vibe_rating
    input_row['review_count'] = 150 
    
    if f"state_{state_code}" in input_row.columns:
        input_row[f"state_{state_code}"] = 1
    if f"clean_cuisine_{cuisine}" in input_row.columns:
        input_row[f"clean_cuisine_{cuisine}"] = 1
        
    predicted_meal_price = artifacts['xgb_model'].predict(input_row)[0]
    predicted_meal_price = max(predicted_meal_price, 7.0) # Safety floor
    
    # --- B. GROCERY PREDICTION ---
    
    # 1. Determine Group Composition
    if people == 1:
        males = 1; females = 0
    else:
        males = people // 2
        females = people - males
        
    # 2. Get Base National Monthly Cost
    plan = GROCERY_PLANS.get(budget_level.lower(), GROCERY_PLANS["moderate"])
    monthly_base_national = (males * plan["male"]) + (females * plan["female"])
    
    # 3. Convert to Weekly
    weekly_base_national = monthly_base_national / 4.33
    
    # 4. APPLY STATE INDEX (The Accuracy Step)
    state_index = artifacts['grocery_index_map'].get(state_code, artifacts['national_avg_index'])
    state_multiplier = state_index / 100.0
    
    weekly_grocery_local = weekly_base_national * state_multiplier
    
    # --- C. TOTAL CALCULATIONS ---
    weeks = days / 7.0
    
    # Restaurant Total
    total_restaurant = predicted_meal_price * eating_out_per_week * weeks * people
    
    # Grocery Total (Reduce if eating out > 5 times/week)
    grocery_factor = 0.7 if eating_out_per_week > 5 else 1.0
    total_grocery = weekly_grocery_local * weeks * grocery_factor
    
    grand_total = total_restaurant + total_grocery

    # --- D. OUTPUT ---
    print(f"\nüìä PREDICTION: {days} Days in {state_code} ({budget_level.title()} Plan)")
    print(f"   ---------------------------------------------")
    print(f"   üë• Group:            {males} Male, {females} Female")
    print(f"   üèôÔ∏è  Local Index:      {state_index:.1f} (Multiplier: {state_multiplier:.3f}x)")
    print(f"   ---------------------------------------------")
    print(f"   üõí GROCERIES (Local): ${total_grocery:.2f}")
    print(f"       ‚Ü≥ National Base:  ${weekly_base_national:.2f}/week")
    print(f"       ‚Ü≥ Local Cost:     ${weekly_grocery_local:.2f}/week")
    print(f"   üçΩÔ∏è  RESTAURANTS:      ${total_restaurant:.2f}")
    print(f"       ‚Ü≥ Avg Meal:       ${predicted_meal_price:.2f} ({cuisine})")
    print(f"   =============================================")
    print(f"   üí∞ TOTAL BUDGET:      ${grand_total:.2f}")

# --- TEST IT NOW ---
predict_trip_breakdown("TX", people=1, days=7, budget_level="moderate", cuisine="Mexican")
predict_trip_breakdown("NY", people=2, days=7, budget_level="liberal", cuisine="Italian")


üìä PREDICTION: 7 Days in TX (Moderate Plan)
   ---------------------------------------------
   üë• Group:            1 Male, 0 Female
   üèôÔ∏è  Local Index:      94.6 (Multiplier: 0.946x)
   ---------------------------------------------
   üõí GROCERIES (Local): $101.59
       ‚Ü≥ National Base:  $107.39/week
       ‚Ü≥ Local Cost:     $101.59/week
   üçΩÔ∏è  RESTAURANTS:      $56.99
       ‚Ü≥ Avg Meal:       $14.25 (Mexican)
   üí∞ TOTAL BUDGET:      $158.58

üìä PREDICTION: 7 Days in NY (Liberal Plan)
   ---------------------------------------------
   üë• Group:            1 Male, 1 Female
   üèôÔ∏è  Local Index:      103.5 (Multiplier: 1.035x)
   ---------------------------------------------
   üõí GROCERIES (Local): $254.57
       ‚Ü≥ National Base:  $245.96/week
       ‚Ü≥ Local Cost:     $254.57/week
   üçΩÔ∏è  RESTAURANTS:      $126.15
       ‚Ü≥ Avg Meal:       $15.77 (Italian)
   üí∞ TOTAL BUDGET:      $380.72


In [0]:
import joblib

# 1. Bundle all necessary "Knowledge" into one dictionary
# We save X_train.columns because the laptop needs to know EXACTLY which dummy columns exist (e.g., 'state_TX', 'clean_cuisine_Pizza')
artifacts = {
    "xgb_model": model,
    "model_columns": X_train.columns.tolist(),  # The exact list of columns the model expects
    "grocery_index_map": grocery_index_map,      # The dictionary for grocery costs
    "state_map_helper": state_map                # The dictionary to convert names to codes
}

# 2. Save to a single file
filename = "food_cost_predictor_v2.pkl"
joblib.dump(artifacts, filename)

print(f"‚úÖ Successfully saved model artifacts to '{filename}'")

# If you are in Google Colab, use this to download it to your machine:
# from google.colab import files
# files.download(filename)

‚úÖ Successfully saved model artifacts to 'food_cost_predictor_v2.pkl'
