In [1]:
import pandas as pd
import numpy as np

In [2]:
file_path = 'Yield.csv' # Adjust path if necessary
try:
    df = pd.read_csv(file_path, header=1)
    print(f"File '{file_path}' loaded successfully.")
    # print(df.head(3)) # Uncomment to view the first few rows
except Exception as e:
    print(f"Error loading '{file_path}': {e}")
    df = pd.DataFrame() # Create empty DataFrame if loading fails

File 'Yield.csv' loaded successfully.


In [3]:
# Remove fully empty columns (if any)
df.dropna(axis=1, how='all', inplace=True)
print(f"Shape after removing empty columns: {df.shape}")
# Note: Constant value columns are less likely in yield data

Shape after removing empty columns: (3, 445)


In [4]:
df_filtered_yield = pd.DataFrame() # Initialize empty
if not df.empty:
    # Assume the first column is Product ID after loading with header=1
    id_col = df.columns[0]
    # Assume the second column identifies the attribute as 'Yield' (may not be needed for filtering)
    # attr_col = df.columns[1] 

    if id_col in df.columns:
        # Filter only for the Product IDs of interest
        condition = df[id_col].isin(['21A', '22B', '23C'])
        df_filtered_yield = df.loc[condition].copy()
        print(f"Rows filtered by Product ID. Shape: {df_filtered_yield.shape}")
        # print(df_filtered_yield[[id_col]].head()) # Uncomment to view head
    else:
        print(f"Error: ID column '{id_col}' not found. Check file loading.")

Rows filtered by Product ID. Shape: (3, 445)


In [5]:
if not df_filtered_yield.empty:
    df_filtered_yield.drop_duplicates(inplace=True)

In [6]:
if not df_filtered_yield.empty:
    df_filtered_yield.columns = df_filtered_yield.columns.str.strip().str.replace(" ", "_").str.replace(".", "_", regex=False).str.replace("-", "_")
    # print("Column names cleaned.") # Uncomment for confirmation

In [7]:
if not df_filtered_yield.empty:
    # Assume first 2 columns are identifiers (ID, Attribute='Yield')
    value_columns = df_filtered_yield.columns[2:]
    print(f"Attempting to convert {len(value_columns)} yield columns to numeric...")
    for col in value_columns:
        df_filtered_yield[col] = pd.to_numeric(df_filtered_yield[col], errors='coerce')

    nan_count = df_filtered_yield[value_columns].isnull().sum().sum()
    if nan_count > 0:
        print(f"Warning: {nan_count} NaNs generated during yield numeric conversion.")
        # Missing yield is problematic. Consider appropriate fill strategy (e.g., forward fill)
        # Example: df_filtered_yield[value_columns] = df_filtered_yield[value_columns].fillna(method='ffill', axis=1)
    else:
        print("Numeric conversion completed without NaNs.")
    # print(df_filtered_yield.info()) # Uncomment to verify dtypes

Attempting to convert 443 yield columns to numeric...
Numeric conversion completed without NaNs.


In [8]:
df_long_yield = pd.DataFrame() # Initialize empty
if not df_filtered_yield.empty:
    try:
        id_col_clean = df_filtered_yield.columns[0] # Cleaned ID col name
        # Second column ('Attribute'='Yield') might not be needed in long format
        value_vars = df_filtered_yield.columns[2:] # Value columns

        # Rename ID column for the final output
        df_to_melt = df_filtered_yield.rename(columns={
            id_col_clean: 'Product_ID'
        })

        # Select only necessary columns for melt (ID and values)
        cols_for_melt = ['Product_ID'] + list(value_vars)

        df_long_yield = pd.melt(
            df_to_melt[cols_for_melt], # Use selected columns
            id_vars=['Product_ID'],
            value_vars=value_vars,
            var_name='Week_Original',
            value_name='Yield_Value' # Name for the yield value
        )
        print(f"Yield DataFrame reshaped to long format. Final shape: {df_long_yield.shape}")
        # print(df_long_yield.head()) # Uncomment to view result
    except Exception as e:
      print(f"Error during Yield melt: {e}")

Yield DataFrame reshaped to long format. Final shape: (1329, 3)


In [9]:
output_file = "Yield_Long.csv"
if not df_long_yield.empty:
    df_long_yield.to_csv(output_file, index=False)
    print(f"Processed Yield data saved to '{output_file}'")
else:
    print("Final Yield DataFrame is empty, file not saved.")

Processed Yield data saved to 'Yield_Long.csv'
