In [1]:
# Import necessary libraries
import kaggle_evaluation.mitsui_inference_server
from sklearn import *
import numpy as np
import pandas as pd
import polars as pl
import os
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")

# Define the path to the input data
data_path = '/kaggle/input/mitsui-commodity-prediction-challenge/'

# Load the training data, training labels, and target pairs
train_data = pd.read_csv(data_path + 'train.csv')
train_labels = pd.read_csv(data_path + 'train_labels.csv')
target_pairs = pd.read_csv(data_path + 'target_pairs.csv')

# Generate column names for the target variables
target_columns = ['target_' + str(i) for i in range(424)]

# Fill missing values in the target columns with 0
train_labels[target_columns] = train_labels[target_columns].fillna(0)

# Select the date_id and target columns
train_labels[["date_id"] + target_columns]

# Function to calculate the rank correlation Sharpe ratio
def calculate_rank_correlation_sharpe_ratio(merged_dataframe: pd.DataFrame) -> float:
    # Identify prediction and target columns
    prediction_columns = [col for col in merged_dataframe.columns if col.startswith('prediction_')]
    target_columns = [col for col in merged_dataframe.columns if col.startswith('target_')]

    # Function to compute rank correlation for a single row
    def compute_rank_correlation(row):
        # Identify non-null target columns
        non_null_targets = [col for col in target_columns if not pd.isnull(row[col])]
        # Identify matching prediction columns
        matching_predictions = [col for col in prediction_columns if col.replace('prediction', 'target') in non_null_targets]

        # Check for non-null target values
        if not non_null_targets:
            raise ValueError('No non-null target values found')

        # Check for zero standard deviation
        if row[non_null_targets].std(ddof=0) == 0 or row[matching_predictions].std(ddof=0) == 0:
            raise ZeroDivisionError('Denominator is zero, unable to compute rank correlation.')

        # Compute and return the rank correlation
        return np.corrcoef(row[matching_predictions].rank(method='average'), row[non_null_targets].rank(method='average'))[0, 1]

    # Apply the rank correlation computation to each row
    daily_rank_correlations = merged_dataframe.apply(compute_rank_correlation, axis=1)

    # Calculate the standard deviation of the daily rank correlations
    std_dev = daily_rank_correlations.std(ddof=0)

    # Check for zero standard deviation
    if std_dev == 0:
        raise ZeroDivisionError('Denominator is zero, unable to compute Sharpe ratio.')

    # Calculate and return the Sharpe ratio
    sharpe_ratio = daily_rank_correlations.mean() / std_dev
    return float(sharpe_ratio)

# Function to calculate the score
def calculate_score(solution: pd.DataFrame, submission: pd.DataFrame) -> float:
    # Ensure the columns in solution and submission match
    assert all(solution.columns == submission.columns)

    # Rename the columns in the submission dataframe
    submission = submission.rename(columns={col: col.replace('target_', 'prediction_') for col in submission.columns})

    # Replace zeros with None in the solution dataframe
    solution = solution.replace(0, None)

    # Calculate and return the rank correlation Sharpe ratio
    return calculate_rank_correlation_sharpe_ratio(pd.concat([solution, submission], axis='columns'))

# Calculate the score for the last 90 rows of the target columns
calculate_score(train_labels[target_columns].tail(90), train_labels[target_columns].tail(90))

# Initialize an empty dataframe for test lag
test_lag = pd.DataFrame()

# Counter for tracking purposes
count = 0

# Function to make predictions
def predict(test_data, label_lags_1_batch, label_lags_2_batch, label_lags_3_batch, label_lags_4_batch):
    global target_columns, train_data, train_labels, test_lag, count

    # Convert test data to pandas dataframe
    test_data = test_data.to_pandas()

    # Concatenate test lag and test data if test lag is not empty
    if len(test_lag) > 0:
        test_data = pd.concat((test_lag, test_data))
    else:
        test_data = pd.concat((test_data, test_data))

    # Get the unique date_id from the last row of the test data
    date_id = [j for j in test_data["date_id"].tail(1).unique()]

    # Get the predictions for the date_id
    predictions = train_labels.loc[train_labels["date_id"].isin(date_id), target_columns]

    return predictions

# Set up the inference server
inference_server = kaggle_evaluation.mitsui_inference_server.MitsuiInferenceServer(predict)

# Serve the inference server or run it locally based on the environment
if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    try:
        inference_server.run_local_gateway((data_path,))
        display(pl.read_parquet('/kaggle/working/submission.parquet'))
    except Exception as e:
        print(f"An error occurred: {e}")

ModuleNotFoundError: No module named 'polars'