In [1]:
import os
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from xgboost import XGBRegressor
import joblib


metadata_path = 'C:/Users/kingGester/Desktop/data/raw/metadata.csv'
meta = pd.read_csv(metadata_path)


def get_full_path(row):
    folder = 'train' if row['sn'] <= 12 else 'test'
    return f"C:/Users/kingGester/Desktop/data/raw/{folder}/{row['filename']}"

meta['full_path'] = meta.apply(get_full_path, axis=1)


train_data = meta[meta['sn'] .isin([4])]


def process_test_file(file_path, target_column='thrust'):
    if not os.path.exists(file_path):
        print(f"🚨 فایل {file_path} پیدا نشد.")
        return None
    try:
        df = pd.read_csv(file_path)
        if target_column not in df.columns:
            print(f"❌ ستون {target_column} در فایل {file_path} وجود ندارد.")
            return None

        df = df[['ton', target_column]].copy()
        df.dropna(inplace=True)

      
        on_duration = []
        count = 0
        for ton in df['ton']:
            count = count + 1 if ton == 1 else 0
            on_duration.append(count)
        df['on_duration'] = on_duration

  
        df['lag_thrust_1'] = df[target_column].shift(1)
        df.dropna(inplace=True)

        df['source_file'] = file_path
        return df

    except Exception as e:
        print(f"🚨 خطا در فایل {file_path}: {e}")
        return None


all_train_frames = []
for idx, row in train_data.iterrows():
    file_path = row['full_path']
    sn_value = row['sn']

    df = process_test_file(file_path)
    if df is not None:
        df['sn'] = sn_value
        all_train_frames.append(df)


df_train = pd.concat(all_train_frames, ignore_index=True)


df_train['rolling_avg_thrust'] = df_train.groupby('sn')['thrust'].rolling(window=5).mean().reset_index(drop=True)
df_train['cumulative_on_time'] = df_train.groupby('sn')['on_duration'].cumsum()


features = ['ton', 'on_duration', 'lag_thrust_1', 'rolling_avg_thrust', 'cumulative_on_time']
target = 'thrust'

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping

# پارامترها
sn = 'SN04'
SEQ_LENGTH = 10   # حافظه زمانی کوتاه
BATCH_SIZE = 16   # کوچکتر برای لپ‌تاپ
EPOCHS = 15       # تعداد کم با EarlyStopping
FEATURES = ['ton', 'on_duration', 'lag_thrust_1', 'rolling_avg_thrust', 'cumulative_on_time']


df_sn = df_train[df_train['sn'] == sn].dropna(subset=FEATURES + ['thrust']).copy()


df_sn = df_sn.iloc[:2000]


def create_sequences(df, features, target, seq_length):
    X, y = [], []
    for i in range(len(df) - seq_length):
        X.append(df[features].iloc[i:i+seq_length].values)
        y.append(df[target].iloc[i+seq_length])
    return np.array(X), np.array(y)

X, y = create_sequences(df_sn, FEATURES, 'thrust', SEQ_LENGTH)


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# تعریف مدل LSTM ساده و بهینه
model = Sequential()
model.add(LSTM(32, activation='tanh', input_shape=(SEQ_LENGTH, len(FEATURES))))
model.add(Dense(1))
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# آموزش با EarlyStopping
model.fit(X_train, y_train,
          epochs=EPOCHS,
          batch_size=BATCH_SIZE,
          validation_data=(X_val, y_val),
          callbacks=[EarlyStopping(patience=3, restore_best_weights=True)],
          verbose=1)


y_pred = model.predict(X_val).flatten()
mae = mean_absolute_error(y_val, y_pred)
r2 = r2_score(y_val, y_pred)

print(f"✅ MAE: {mae:.6f}")
print(f"✅ R²: {r2:.4f}")


plt.figure(figsize=(12, 4))
plt.plot(y_val[:200], label='Actual', color='blue')
plt.plot(y_pred[:200], label='Predicted (LSTM)', color='green')
plt.title(f"{sn} – Actual vs Predicted Thrust (LSTM)")
plt.xlabel("Sample")
plt.ylabel("Thrust (N)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.