In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, VotingRegressor
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np
import joblib

# تحميل البيانات
file_path = '/content/Drying3.xlsx'
data = pd.read_excel(file_path, sheet_name='Sheet1')

# فصل المدخلات والمخرجات
X = data[['Drying Time (min )', 'Velocity m/s', 'Temperatures C', 'Thickness mm', 'Sample mass in (g)']]
y = data[['MR']]

# تقسيم البيانات إلى تدريب واختبار
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# تحجيم البيانات
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# إعداد النماذج الفردية
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
lgb_model = LGBMRegressor(
    n_estimators=100,
    learning_rate=0.1,
    random_state=42,
    num_leaves=31,
    max_depth=-1,
    min_gain_to_split=0,
    min_child_samples=20
)
svr_model = SVR(kernel='rbf', C=1.0, epsilon=0.1)

models = {
    'Random Forest': rf_model,
    'XGBoost': xgb_model,
    'LightGBM': lgb_model,
    'SVR': svr_model
}

# تقييم كل نموذج على حدة
results = {}
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train.values.ravel())  # تدريب النموذج
    y_pred = model.predict(X_test_scaled)  # التنبؤ باستخدام بيانات الاختبار

    # حساب المقاييس
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # تخزين النتائج
    results[model_name] = {'MSE': mse, 'RMSE': rmse, 'MAE': mae, 'R²': r2}
    print(f"{model_name} - MSE: {mse:.4f}, RMSE: {rmse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}")

    # حفظ النموذج
    joblib.dump(model, f"{model_name}_model.joblib")

# إنشاء وتجميع نموذج Ensemble باستخدام VotingRegressor
ensemble_model = VotingRegressor([
    ('rf', rf_model),
    ('xgb', xgb_model),
    ('lgb', lgb_model),
    ('svr', svr_model)
])

# تدريب نموذج Ensemble وتقييمه
ensemble_model.fit(X_train_scaled, y_train.values.ravel())
y_pred_ensemble = ensemble_model.predict(X_test_scaled)

# حساب المقاييس لنموذج Ensemble
mse_ensemble = mean_squared_error(y_test, y_pred_ensemble)
rmse_ensemble = np.sqrt(mse_ensemble)
mae_ensemble = mean_absolute_error(y_test, y_pred_ensemble)
r2_ensemble = r2_score(y_test, y_pred_ensemble)

print(f"\nEnsemble Model - MSE: {mse_ensemble:.4f}, RMSE: {rmse_ensemble:.4f}, MAE: {mae_ensemble:.4f}, R²: {r2_ensemble:.4f}")

# حفظ نموذج Ensemble
joblib.dump(ensemble_model, "ensemble_model.joblib")

# طباعة النتائج النهائية
print("\nنتائج التقييم لكل نموذج:")
for model_name, metrics in results.items():
    print(f"{model_name}: MSE = {metrics['MSE']:.4f}, RMSE = {metrics['RMSE']:.4f}, MAE = {metrics['MAE']:.4f}, R² = {metrics['R²']:.4f}")
print(f"Ensemble Model: MSE = {mse_ensemble:.4f}, RMSE = {rmse_ensemble:.4f}, MAE = {mae_ensemble:.4f}, R² = {r2_ensemble:.4f}")


Random Forest - MSE: 0.0029, RMSE: 0.0540, MAE: 0.0411, R²: 0.9658
XGBoost - MSE: 0.0014, RMSE: 0.0373, MAE: 0.0269, R²: 0.9837
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.001096 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 57
[LightGBM] [Info] Number of data points in the train set: 244, number of used features: 5
[LightGBM] [Info] Start training from score 0.382336
LightGBM - MSE: 0.0024, RMSE: 0.0492, MAE: 0.0391, R²: 0.9716
SVR - MSE: 0.0040, RMSE: 0.0636, MAE: 0.0475, R²: 0.9526
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000041 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 57
[LightGBM] [Info] Number of data points in the train set: 244, number of used features: 5
[LightGBM] [Info] Start training from score 0.382336

Ensemble Model

In [None]:
import joblib

# تحميل نموذج Ensemble المحفوظ
ensemble_model = joblib.load("/content/ensemble_model.joblib")

# افتراضًا أن لديك بيانات جديدة `X_new` وتحتاج إلى تحجيمها بنفس الطريقة المستخدمة أثناء التدريب
# تأكد من تحجيم البيانات الجديدة باستخدام `scaler` الذي تم تدريبه سابقًا
X_new_scaled = scaler.transform(X_new)

# استخدام النموذج للتنبؤات
y_pred_new = ensemble_model.predict(X_new_scaled)

print("التنبؤات للبيانات الجديدة:", y_pred_new)


In [None]:
# 1. تجهيز البيانات الجديدة
# تأكد من أن بيانات `X_new` الجديدة تحتوي على نفس الأعمدة التي تم استخدامها في التدريب
X_new = pd.DataFrame({
    'Drying Time (min )': [30],
    'Velocity m/s': [1.5],
    'Temperatures C': [70],
    'Thickness mm': [5],
    'Sample mass in (g)': [100]
})

# 2. تطبيق التحجيم على البيانات الجديدة
X_new_scaled = scaler.transform(X_new)

# 3. استخدام النموذج النهائي للتنبؤات
y_pred_new = xgb_model.predict(X_new_scaled)

print("التنبؤات للبيانات الجديدة:", y_pred_new)

التنبؤات للبيانات الجديدة: [0.769963]
