In [198]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_log_error
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/tss24-competition-4/sample_submission.csv
/kaggle/input/tss24-competition-4/train.csv
/kaggle/input/tss24-competition-4/test.csv


In [199]:
#Load the data
train = pd.read_csv('/kaggle/input/tss24-competition-4/train.csv')
test = pd.read_csv('/kaggle/input/tss24-competition-4/test.csv')
sample_submission = pd.read_csv('/kaggle/input/tss24-competition-4/sample_submission.csv')

In [202]:
# Assuming 'Rings' is the target variable and 'id' is the identifier
X = train.drop(columns=['Rings', 'id'])
y = train['Rings']
X_test = test.drop(columns=['id'])

# Encode categorical features using Label Encoding
label_encoders = {}
for column in X.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    X[column] = label_encoders[column].fit_transform(X[column])
    X_test[column] = label_encoders[column].transform(X_test[column])

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_test_scaled = scaler.transform(X_test)

# Split the training data into train and validation sets
X_train, X_val, y_train, y_val = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train models
model_xgb = XGBRegressor(n_estimators=50, learning_rate=0.1, random_state=42)
model_xgb.fit(X_train, y_train)

model_rf = RandomForestRegressor(n_estimators=50, random_state=42)
model_rf.fit(X_train, y_train)

# Make predictions
y_pred_xgb = model_xgb.predict(X_val)
y_pred_rf = model_rf.predict(X_val)
y_test_pred_xgb = model_xgb.predict(X_test_scaled)
y_test_pred_rf = model_rf.predict(X_test_scaled)

# Calculate RMSLE for each model
rmsle_xgb = np.sqrt(mean_squared_log_error(y_val, y_pred_xgb))
rmsle_rf = np.sqrt(mean_squared_log_error(y_val, y_pred_rf))
print(f'Validation RMSLE - XGBoost: {rmsle_xgb}')
print(f'Validation RMSLE - RandomForest: {rmsle_rf}')

# Blended predictions (equal weights for simplicity)
blended_predictions = (y_test_pred_xgb + y_test_pred_rf) / 2

# Prepare the submission file
submission = sample_submission.copy()
submission['Rings'] = blended_predictions

# Save the submission file
submission.to_csv('blended_submission.csv', index=False)


Validation RMSLE - XGBoost: 0.15347137131407518
Validation RMSLE - RandomForest: 0.15696626617397205
