# Chemical Bonds Prediction

## Import necessary libraries

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
import lightgbm as lgb
import xgboost as xgb
from catboost import CatBoostRegressor

## Load train and test data

In [2]:
train_data = pd.read_csv('train.csv')
test_data = pd.read_csv('test.csv')

## Data Preprocessing

In [3]:
median_value = train_data['AlogP'].median()
train_data['AlogP'].fillna(median_value, inplace=True)
test_data['AlogP'].fillna(median_value, inplace=True)

## Splitting features and targets

In [4]:
X = train_data.drop(columns=['id', 'SMILES', 'MLM', 'HLM'])
y_mlm = train_data['MLM']
y_hlm = train_data['HLM']
X_test = test_data.drop(columns=['id', 'SMILES'])

## Model Training and Predictions

In [5]:

# LightGBM
lgb_mlm = lgb.LGBMRegressor(random_state=42)
lgb_mlm.fit(X, y_mlm)
lgb_pred_mlm = lgb_mlm.predict(X_test)

lgb_hlm = lgb.LGBMRegressor(random_state=42)
lgb_hlm.fit(X, y_hlm)
lgb_pred_hlm = lgb_hlm.predict(X_test)

# XGBoost
xgb_mlm = xgb.XGBRegressor(random_state=42)
xgb_mlm.fit(X, y_mlm)
xgb_pred_mlm = xgb_mlm.predict(X_test)

xgb_hlm = xgb.XGBRegressor(random_state=42)
xgb_hlm.fit(X, y_hlm)
xgb_pred_hlm = xgb_hlm.predict(X_test)

# CatBoost
cat_mlm = CatBoostRegressor(random_state=42, verbose=0)
cat_mlm.fit(X, y_mlm)
cat_pred_mlm = cat_mlm.predict(X_test)

cat_hlm = CatBoostRegressor(random_state=42, verbose=0)
cat_hlm.fit(X, y_hlm)
cat_pred_hlm = cat_hlm.predict(X_test)


## Combine Predictions and Save to CSV

In [6]:

# Averaging predictions from the three models
final_pred_mlm = (lgb_pred_mlm + xgb_pred_mlm + cat_pred_mlm) / 3
final_pred_hlm = (lgb_pred_hlm + xgb_pred_hlm + cat_pred_hlm) / 3

# Create final submission DataFrame
submission = pd.DataFrame({
    'id': test_data['id'],
    'MLM': final_pred_mlm,
    'HLM': final_pred_hlm
})

# Save submission to CSV
submission.to_csv('submission.csv', index=False)
