In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import cross_val_score
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, make_scorer
import joblib

# Load dataset
data = pd.read_csv("train.csv")

# Feature and target selection
features = ['Month', 'Day', 'kingdom']
targets = ['Avg_Temperature', 'Radiation', 'Wind_Speed', 'Wind_Direction', 'Rain_Amount']

# Preprocessing pipeline
categorical_features = ['kingdom']
numerical_features = ['Month', 'Day']

preprocessor = ColumnTransformer([
    ('num', StandardScaler(), numerical_features),
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_features)
])

# Custom MAE scorer
mae_scorer = make_scorer(mean_absolute_error, greater_is_better=False)

# Initialize scaler for Rain_Amount
rain_scaler = StandardScaler()

def train_and_save_model(target, X, y, model_name='XGBoost'):
    model = XGBRegressor(n_estimators=200, objective='reg:squarederror', random_state=42)
    pipeline = Pipeline([
        ('preprocessor', preprocessor),
        ('regressor', model)
    ])
    
    scores = cross_val_score(pipeline, X, y, cv=5, scoring=mae_scorer)
    print(f"{target}: MAE = {-np.mean(scores)}")
    
    pipeline.fit(X, y)
    joblib.dump(pipeline, f'best_model_{target}.pkl')
    return pipeline

# Train first 4 models
best_models = {}
for target in targets[:-1]:  # Exclude Rain_Amount initially
    X_target = data[features]
    y_target = data[target]
    best_models[target] = train_and_save_model(target, X_target, y_target)

# Generate predictions for first 4 targets
data_predictions = data[features].copy()
for target in targets[:-1]:
    model = best_models[target]
    data_predictions[target] = model.predict(data[features])

# Train Rain_Amount model with scaled target
y_rain = data['Rain_Amount'].values.reshape(-1, 1)
y_rain_scaled = rain_scaler.fit_transform(y_rain)  # Scale rain amount
joblib.dump(rain_scaler, 'rain_scaler.pkl')

X_rain = data_predictions  # Now includes predictions of other targets
best_models['Rain_Amount'] = train_and_save_model('Rain_Amount', X_rain, y_rain_scaled)

print("Model training complete. All best models saved.")


In [None]:
import pandas as pd
import numpy as np
import joblib

# Load trained models and scaler
best_models = {}
for target in ['Avg_Temperature', 'Radiation', 'Wind_Speed', 'Wind_Direction', 'Rain_Amount']:
    best_models[target] = joblib.load(f'best_model_{target}.pkl')

rain_scaler = joblib.load('rain_scaler.pkl')

# Load test data
test_data = pd.read_csv("test.csv")

# Feature selection
features = ['Month', 'Day', 'kingdom']

# Step 1: Predict first 4 targets
predictions = {}
for target in ['Avg_Temperature', 'Radiation', 'Wind_Speed', 'Wind_Direction']:
    print(f"Making predictions for {target}...")
    model = best_models[target]
    X_test = test_data[features]
    predictions[target] = model.predict(X_test)

# Add predictions as features for Rain_Amount prediction
for target in ['Avg_Temperature', 'Radiation', 'Wind_Speed', 'Wind_Direction']:
    test_data[f'Predicted_{target}'] = predictions[target]

# Step 2: Predict Rain_Amount
rain_features = features + ['Predicted_Avg_Temperature', 'Predicted_Radiation', 
                            'Predicted_Wind_Speed', 'Predicted_Wind_Direction']
print("Making predictions for Rain_Amount...")
rain_model = best_models['Rain_Amount']
predictions['Rain_Amount'] = rain_model.predict(test_data[rain_features])

# Inverse transform Rain_Amount and clip negatives
predictions['Rain_Amount'] = rain_scaler.inverse_transform(
    predictions['Rain_Amount'].reshape(-1, 1)).flatten()
predictions['Rain_Amount'] = np.maximum(predictions['Rain_Amount'], 0)

# Convert predictions to DataFrame and save
predictions_df = pd.DataFrame(predictions)
predictions_df["ID"] = test_data["ID"]
predictions_df.to_csv('predictions.csv', index=False)

print("Predictions complete. Results saved to 'predictions.csv'.")
