In [2]:
# import paython libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from FeatureEngeneering import CreateNewFeatures
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import  StandardScaler
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
from sklearn.linear_model import Ridge, HuberRegressor


In [3]:
# Load csv file and create dataframe
df = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
submission = pd.read_csv('sample_submission.csv')


# Feature engineering 
df = CreateNewFeatures(df)
test = CreateNewFeatures(test)


# Split data
X = df.drop(columns= ['id','yield'])
test = test.drop(columns=['id'])
y = df['yield']


# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:

# Base model 1: Huber Regressor with StandardScaler
base_model_1 = Pipeline([
    ('scaler', StandardScaler()),
    ('huber', Ridge())
])

# Base model 2: Random Forest Regressor (no scaling)
base_model_2 = RandomForestRegressor(  min_samples_split=6,
        min_samples_leaf=10,
        n_estimators=100,
        max_depth=19,
        max_features='sqrt',
        bootstrap=True,
        random_state=42,
        n_jobs=-1)
# Base model 3: Random Forest Regressor (no scaling)
base_model_3 = DecisionTreeRegressor(criterion= 'friedman_mse',
        min_samples_split=6,
        min_samples_leaf=10,
        max_depth=19,
        max_features='sqrt',
        random_state=42,
        )



# Meta-model: Ridge Regression with StandardScaler
meta_model = Pipeline([
    ('scaler', StandardScaler()),
    ('ridge', HuberRegressor())
])

# Stacking Regressor (using base models and meta-model)
stacked_model = StackingRegressor(
    estimators=[('huber', base_model_1), ('rf', base_model_2),('linear',base_model_3)],
    final_estimator=meta_model
)

# Fit the stacked model
stacked_model.fit(X_train, y_train)

# Make predictions
y_pred = stacked_model.predict(X_test)

# Evaluate the model using Mean Absolute Error (MAE)
mae = mean_absolute_error(y_test, y_pred)
print(f"Mean Absolute Error: {mae:.2f}")


Mean Absolute Error: 245.40


In [5]:
# Submit Result
sub = stacked_model.predict(test)
submission['yield'] = sub
submission.to_csv('my_submission.csv',index=False)
