In [None]:
import pandas as pd
from google.colab import files
from functools import reduce

# انتخاب فایل‌ها از حافظه محلی
uploaded = files.upload()

dfs = []
for filename in uploaded.keys():
    if filename.endswith(".csv"):
        df = pd.read_csv(filename)
    elif filename.endswith(".xlsx"):
        df = pd.read_excel(filename)
    else:
        print(f"فرمت {filename} پشتیبانی نمی‌شود!")
        continue

    print(f"{filename}: {df.shape} ردیف و ستون")
    print(df.head(), "\n")

    # پیدا کردن ستون تاریخ احتمالی
    date_col = [c for c in df.columns if 'date' in c.lower()]
    if date_col:
        df[date_col[0]] = pd.to_datetime(df[date_col[0]])
        df = df.rename(columns={date_col[0]:'Date'})
    else:
        print(f"{filename} ستونی شبیه تاریخ نداره!")

    dfs.append(df)

# فقط دیتاهای دارای ستون Date رو نگه می‌داریم
dfs = [df for df in dfs if 'Date' in df.columns]

# هم‌تراز کردن همه دیتاها به ماهانه
for i in range(len(dfs)):
    dfs[i] = dfs[i].set_index('Date').resample('M').mean().reset_index()

# Merge همه دیتاها روی ستون Date
if dfs:
    final_df = reduce(lambda left, right: pd.merge(left, right, on='Date', how='outer'), dfs)
    print("Shape دیتاست نهایی:", final_df.shape)
    print(final_df.head())
    print("\nNaN ها در هر ستون:\n", final_df.isna().sum())
else:
    print("هیچ دیتافریمی با ستون Date پیدا نشد!")


Saving CMO Monthly.xlsx to CMO Monthly (3).xlsx
Saving CMOHistoricalDataMonthly (1).xlsx to CMOHistoricalDataMonthly (1) (3).xlsx
Saving urea_regional_multivariate_2000_2025 (2).csv to urea_regional_multivariate_2000_2025 (2) (3).csv
Saving weekly Henry_Hub_Natural_Gas_Spot_Price.csv to weekly Henry_Hub_Natural_Gas_Spot_Price (3).csv
Saving Weekly_Europe_Brent oil_Spot_Price_FOB.csv to Weekly_Europe_Brent oil_Spot_Price_FOB (3).csv
CMO Monthly (3).xlsx: (0, 0) ردیف و ستون
Empty DataFrame
Columns: []
Index: [] 

CMO Monthly (3).xlsx ستونی شبیه تاریخ نداره!
CMOHistoricalDataMonthly (1) (3).xlsx: (0, 0) ردیف و ستون
Empty DataFrame
Columns: []
Index: [] 

CMOHistoricalDataMonthly (1) (3).xlsx ستونی شبیه تاریخ نداره!
urea_regional_multivariate_2000_2025 (2) (3).csv: (308, 11) ردیف و ستون
         Date  Urea_Global  Urea_PersianGulf  Urea_BlackSea  Urea_China  \
0  2000-01-01        202.2             175.6          203.6       184.0   
1  2000-02-01        178.4             196.3          18

  dfs[i] = dfs[i].set_index('Date').resample('M').mean().reset_index()


In [None]:
# درصد نال هر ستون
nan_ratio = final_df.isna().mean()
print("درصد NaN در هر ستون:\n", nan_ratio)

# آستانه برای حذف ستون‌ها، مثلا 20%
threshold = 0.2

# ستون‌هایی که بالاتر از threshold هستن حذف میشن
cols_to_drop = nan_ratio[nan_ratio > threshold].index
print("ستون‌های حذف شده به دلیل NaN زیاد:", cols_to_drop.tolist())

# حذف ستون‌ها
final_df = final_df.drop(columns=cols_to_drop)

# پر کردن NaN باقی‌مانده با میانگین یا 0
final_df = final_df.fillna(final_df.mean())


درصد NaN در هر ستون:
 Date                0.0
Urea_Global         0.0
Urea_PersianGulf    0.0
Urea_BlackSea       0.0
Urea_China          0.0
Urea_US_Gulf        0.0
NaturalGas          0.0
CrudeOil_Brent      0.0
Corn                0.0
Wheat               0.0
Soybeans            0.0
dtype: float64
ستون‌های حذف شده به دلیل NaN زیاد: []


In [None]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import numpy as np

# --- داده‌ها ---
# فرض: final_df آماده و بدون NaN هست
target = 'Urea_Global'
features = ['Urea_PersianGulf', 'Urea_BlackSea', 'Urea_China',
            'Urea_US_Gulf', 'NaturalGas', 'CrudeOil_Brent',
            'Corn', 'Wheat', 'Soybeans']

# --- مقیاس‌دهی ---
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(final_df[features])
y_scaled = scaler_y.fit_transform(final_df[[target]])

# --- تقسیم داده ---
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y_scaled, test_size=0.2, shuffle=False
)

# --- آماده‌سازی برای LSTM ---
# اگر از LSTM با timesteps>1 استفاده کنیم:
timesteps = 6  # می‌تونیم تغییر بدیم
def create_lstm_data(X, y, timesteps=1):
    Xs, ys = [], []
    for i in range(len(X) - timesteps):
        Xs.append(X[i:(i+timesteps)])
        ys.append(y[i+timesteps])
    return np.array(Xs), np.array(ys)

X_train_lstm, y_train_lstm = create_lstm_data(X_train, y_train, timesteps)
X_test_lstm, y_test_lstm = create_lstm_data(X_test, y_test, timesteps)

print("Shapes:")
print("X_train:", X_train_lstm.shape)
print("y_train:", y_train_lstm.shape)
print("X_test:", X_test_lstm.shape)
print("y_test:", y_test_lstm.shape)


Shapes:
X_train: (240, 6, 9)
y_train: (240, 1)
X_test: (56, 6, 9)
y_test: (56, 1)


In [None]:
print(X_train.shape)


(246, 9)


In [None]:
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test  = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))


In [None]:
model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))


  super().__init__(**kwargs)


In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense

# ==== تنظیم داده‌ها ====
timesteps = 6  # تعداد زمان‌ها در هر نمونه
features = X_train.shape[1]  # تعداد فیتچرها

# مطمئن می‌شیم تعداد نمونه‌ها بر timesteps بخش‌پذیر باشه
n_train = (X_train.shape[0] // timesteps) * timesteps
n_test  = (X_test.shape[0]  // timesteps) * timesteps

X_train = X_train[:n_train].reshape((n_train // timesteps, timesteps, features))
y_train = y_train[:n_train].reshape((n_train // timesteps, timesteps, 1))[:, -1, :]  # فقط آخرین قدم
X_test  = X_test[:n_test].reshape((n_test // timesteps, timesteps, features))
y_test  = y_test[:n_test].reshape((n_test // timesteps, timesteps, 1))[:, -1, :]

# ==== ساخت مدل LSTM ====
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(timesteps, features)))
model.add(Dropout(0.2))
model.add(LSTM(25))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mse', metrics=['mae'])
model.summary()


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# مدل آماده است (همون مدل شما)
model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

# آموزش مدل
history = model.fit(X_train, y_train, epochs=50, batch_size=16, validation_split=0.1, verbose=2)

# پیش‌بینی روی داده تست
y_pred = model.predict(X_test)

# ارزیابی
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"R²: {r2:.4f}")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")


Epoch 1/50
3/3 - 5s - 2s/step - loss: 0.0148 - mae: 0.1020 - val_loss: 0.0137 - val_mae: 0.1031
Epoch 2/50
3/3 - 0s - 36ms/step - loss: 0.0032 - mae: 0.0465 - val_loss: 0.0041 - val_mae: 0.0569
Epoch 3/50
3/3 - 0s - 33ms/step - loss: 0.0078 - mae: 0.0756 - val_loss: 0.0041 - val_mae: 0.0569
Epoch 4/50
3/3 - 0s - 35ms/step - loss: 0.0074 - mae: 0.0678 - val_loss: 0.0068 - val_mae: 0.0593
Epoch 5/50
3/3 - 0s - 33ms/step - loss: 0.0047 - mae: 0.0546 - val_loss: 0.0181 - val_mae: 0.1241
Epoch 6/50
3/3 - 0s - 47ms/step - loss: 0.0047 - mae: 0.0558 - val_loss: 0.0241 - val_mae: 0.1468
Epoch 7/50
3/3 - 0s - 35ms/step - loss: 0.0055 - mae: 0.0623 - val_loss: 0.0197 - val_mae: 0.1308
Epoch 8/50
3/3 - 0s - 33ms/step - loss: 0.0060 - mae: 0.0622 - val_loss: 0.0122 - val_mae: 0.0969
Epoch 9/50
3/3 - 0s - 33ms/step - loss: 0.0043 - mae: 0.0492 - val_loss: 0.0076 - val_mae: 0.0674
Epoch 10/50
3/3 - 0s - 35ms/step - loss: 0.0035 - mae: 0.0477 - val_loss: 0.0056 - val_mae: 0.0514
Epoch 11/50
3/3 - 0s 

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

# --- دیتاست ---
# فرض: final_df آماده و بدون NaN است
cols_to_predict = ['Urea_Global', 'Corn', 'Wheat', 'Soybeans']
results = {}

timesteps = 6

for target in cols_to_predict:
    # انتخاب ویژگی‌ها (می‌تونی همه غیر از target و Date رو انتخاب کنی)
    features = [col for col in final_df.columns if col not in ['Date', target]]

    data = final_df[features + [target]].copy()

    # نرمال‌سازی
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)

    X, y = [], []
    for i in range(timesteps, len(scaled_data)):
        X.append(scaled_data[i-timesteps:i, :-1])
        y.append(scaled_data[i, -1])

    X, y = np.array(X), np.array(y)

    # تقسیم Train/Test
    split = int(len(X)*0.8)
    X_train, X_test = X[:split], X[split:]
    y_train, y_test = y[:split], y[split:]

    # مدل LSTM
    model = Sequential()
    model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(LSTM(25))
    model.add(Dropout(0.2))
    model.add(Dense(1))

    model.compile(optimizer='adam', loss='mse', metrics=['mae'])

    # آموزش
    model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=0)

    # پیش‌بینی و ارزیابی
    y_pred = model.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))

    results[target] = {'r2': r2, 'mae': mae, 'rmse': rmse, 'predictions': y_pred}

# نمایش نتایج
for target, metrics in results.items():
    print(f"{target}: R²={metrics['r2']:.3f}, MAE={metrics['mae']:.3f}, RMSE={metrics['rmse']:.3f}")


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 282ms/step


  super().__init__(**kwargs)


[1m1/2[0m [32m━━━━━━━━━━[0m[37m━━━━━━━━━━[0m [1m0s[0m 272ms/step



[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 281ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 401ms/step


  super().__init__(**kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 281ms/step
Urea_Global: R²=0.436, MAE=0.084, RMSE=0.126
Corn: R²=-2.571, MAE=0.179, RMSE=0.264
Wheat: R²=-1.582, MAE=0.142, RMSE=0.204
Soybeans: R²=-6.475, MAE=0.170, RMSE=0.264
