# Merging two pivoted datasets (crop yields and crop prices)

In [None]:
import pandas as pd

# Importing the tradyield and tradprice data
DATA_FOLDER = '~/data/'
TRADYIELD = DATA_FOLDER + "tradyield_pivotcleaned.csv"
TRADPRICE = DATA_FOLDER + "tradprice_pivotcleaned.csv"

# Load the CSVs into dataframes
yield_df = pd.read_csv(TRADYIELD)
price_df = pd.read_csv(TRADPRICE)

# Convert all crop columns to numeric, coercing errors to NaN
for column in yield_df.columns[1:]:
    yield_df[column] = pd.to_numeric(yield_df[column], errors='coerce')

for column in price_df.columns[1:]:
    price_df[column] = pd.to_numeric(price_df[column], errors='coerce')

# Find common columns (excluding the 'State' column)
common_columns = list(set(yield_df.columns[1:]) & set(price_df.columns[1:]))

# Calculate and print the number of columns lost
total_columns = len(set(yield_df.columns[1:]) | set(price_df.columns[1:]))
columns_lost = total_columns - len(common_columns)
print(f"Number of columns lost during the merge: {columns_lost}")

# Sort the common columns alphabetically and add 'State' at the beginning
common_columns = ['State'] + sorted(common_columns)

# Filter both dataframes to keep only the common columns
yield_common = yield_df[common_columns]
price_common = price_df[common_columns]

# Merge the dataframes on 'State' based on the common columns
merged_common_df = pd.merge(yield_common, price_common, on="State", suffixes=('_yield', '_price'))

# Create a new dataframe where each crop has a list [yield, price]
combined_data_common = {}

for col in common_columns[1:]:
    combined_data_common[col] = merged_common_df[[f'{col}_yield', f'{col}_price']].apply(
        lambda row: [row[0] if not pd.isna(row[0]) else 0, row[1] if not pd.isna(row[1]) else 0], axis=1)

# Create a new dataframe with combined yield and price data
final_combined_common_df = pd.DataFrame(combined_data_common)
final_combined_common_df.insert(0, 'State', merged_common_df['State'])

# Save to CSV or print result
FINAL_PATH = DATA_FOLDER+"combined_yield_price_common_data.csv"
final_combined_common_df.to_csv(FINAL_PATH, index=False)

# Optional: print the first few rows to check
print(final_combined_common_df.head())
